东方耀AI技术分享
标题:
目标检测之coco与voc格式的数据相互转换,并验证coco格式的...
[打印本页]
作者:
东方耀
时间:
2020-12-1 15:58
标题:
目标检测之coco与voc格式的数据相互转换,并验证coco格式的...
目标检测之coco与voc格式的数据相互转换,并验证coco格式的脚本
# -*- coding: utf-8 -*-
__author__ = u'江锦炬 微信:dfy_88888'
__date__ = '2020/12/1 上午11:27'
__product__ = 'PyCharm'
__filename__ = '08_coco_to_voc'
# import os, cv2, sys, shutil
import cv2
import shutil
import glob
import os
from xml.dom.minidom import Document
# 将coco和yolo格式的目标检测数据集转换为voc格式的,方便进行anchor的k-means聚类
train_dir_coco = "/media/jiang/AI_DataSets/DataSets/person_vehicle_coco/person_vehicle_yolo_train"
val_dir_coco = "/media/jiang/AI_DataSets/DataSets/person_vehicle_coco/person_vehicle_yolo_val"
label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
output_dir_voc = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc"
def writexml(img_path, ori_hw, bboxes, xmlpath):
doc = Document()
# 根节点
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
# 注意:widerface_voc voc格式数据的文件夹名字
folder_name = doc.createTextNode('person_vehicle_voc')
folder.appendChild(folder_name)
annotation.appendChild(folder)
filenamenode = doc.createElement('filename')
filename_name = doc.createTextNode(img_path)
filenamenode.appendChild(filename_name)
annotation.appendChild(filenamenode)
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode('coco Database'))
source.appendChild(database)
annotation_s = doc.createElement('annotation')
annotation_s.appendChild(doc.createTextNode('jjj VOC2020'))
source.appendChild(annotation_s)
image = doc.createElement('image')
image.appendChild(doc.createTextNode('flickr'))
source.appendChild(image)
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('-1'))
source.appendChild(flickrid)
owner = doc.createElement('owner')
annotation.appendChild(owner)
flickrid_o = doc.createElement('flickrid')
flickrid_o.appendChild(doc.createTextNode('jjj_88888'))
owner.appendChild(flickrid_o)
name_o = doc.createElement('name')
name_o.appendChild(doc.createTextNode('jjj_88888'))
owner.appendChild(name_o)
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(ori_hw[1])))
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(ori_hw[0])))
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode("3"))
size.appendChild(width)
size.appendChild(height)
size.appendChild(depth)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode('0'))
annotation.appendChild(segmented)
for i in range(len(bboxes)):
# bbox = ['person', 412, 157, 465, 295]
bbox = bboxes[i]
objects = doc.createElement('object')
annotation.appendChild(objects)
object_name = doc.createElement('name')
#
object_name.appendChild(doc.createTextNode(bbox[0]))
objects.appendChild(object_name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
objects.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('1'))
objects.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
objects.appendChild(difficult)
bndbox = doc.createElement('bndbox')
objects.appendChild(bndbox)
# xmin ymin 就是标记框 左上角的坐标
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(bbox[1])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(bbox[2])))
bndbox.appendChild(ymin)
# xmax ymax 就是标记框 右下角的坐标
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(bbox[3])))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(str(bbox[4])))
bndbox.appendChild(ymax)
with open(xmlpath, 'w') as f:
f.write(doc.toprettyxml(indent=''))
def convert_imgset(img_set_type, dir_coco):
"""
转换数据集(WiderFace---> VOC)
:param img_set_type: train or val
:return:
"""
img_num = len(glob.glob(os.path.join(dir_coco, "*.jpg")))
txt_label_num = len(glob.glob(os.path.join(dir_coco, "*.txt")))
assert img_num == txt_label_num, "必须一致!"
print("数据集:{};图片数量={};标签txt文件数:{}".format(img_set_type, img_num, txt_label_num))
output_set_txt = os.path.join(output_dir_voc, "ImageSets", "Main", "%s.txt" % img_set_type)
output_set_txt_writer = open(output_set_txt, "w")
for img_path in glob.glob(os.path.join(dir_coco, "*.jpg")):
shutil.copy2(img_path, os.path.join(output_dir_voc, "JPEGImages"))
ori_h, ori_w = cv2.imread(img_path).shape[:2]
img_name = img_path.strip().split("/")[-1].split(".")[0]
txt_path = img_path.strip().replace("jpg", "txt")
print("图片的宽={},高={},name={}, txt_path={}".format(ori_w, ori_h, img_name, txt_path))
output_set_txt_writer.write("%s\n" % img_name)
# os.path.join(output_dir_voc, "Annotations")
# "%d %.6f %.6f %.6f %.6f\n" % (class_id, cx, cy, w, h)
bboxes = []
with open(txt_path, "r") as f:
lines = f.readlines()
for line in lines:
class_id, cx, cy, w, h = line.strip().split(" ")
x1 = float(cx) - float(w) / 2
x2 = float(cx) + float(w) / 2
y1 = float(cy) - float(h) / 2
y2 = float(cy) + float(h) / 2
x1_int = int(x1 * ori_w)
y1_int = int(y1 * ori_h)
x2_int = int(x2 * ori_w)
y2_int = int(y2 * ori_h)
bbox = [label_dict[class_id], x1_int, y1_int, x2_int, y2_int]
bboxes.append(bbox)
if len(bboxes) == 0:
print('txt的标注文件 没有任何目标类')
continue
print("该图片={}的所有box标注:{}".format(img_name, bboxes))
annotation_xml_path = os.path.join(output_dir_voc, "Annotations", "%s.xml" % img_name)
writexml(img_path, (ori_h, ori_w), bboxes, annotation_xml_path)
# assert 0 == 1, "停"
output_set_txt_writer.close()
if __name__ == '__main__':
# convert_imgset('val', val_dir_coco)
convert_imgset('train', train_dir_coco)
复制代码
# -*- coding: utf-8 -*-
from __future__ import division, print_function
__author__ = u'江锦炬 微信:dfy_88888'
__date__ = '2020/11/3 下午4:04'
__product__ = 'PyCharm'
__filename__ = 'dfy_voc_2_yolo_format'
import cv2
import os
import shutil
import glob
# import xml.etree.ElementTree as ET
from xml.dom.minidom import parse
import numpy as np
import time
import math
import logging as log
import glob
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
fontC = ImageFont.truetype("./Font/platech.ttf", 16, 0)
# 打上boundingbox和标签
def drawRectBox(image, rect, addText, color=(0, 0, 255)):
cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[2]), int(rect[3])), color, 2,
cv2.LINE_AA)
# 这个矩形是 文本的边框 高度=18 宽度=115 -1是填充? 红色底
cv2.rectangle(image, (int(rect[0] - 1), int(rect[1]) - 18), (int(rect[0] + 115), int(rect[1])), color, -1,
cv2.LINE_AA)
img = Image.fromarray(image)
draw = ImageDraw.Draw(img)
# draw.text((int(rect[0]+1), int(rect[1]-16)), addText.decode("utf-8"), (255, 255, 255), font=fontC)
# 字体颜色是白色
draw.text((int(rect[0] + 1), int(rect[1] - 16)), addText, (255, 255, 255), font=fontC)
imagex = np.array(img)
return imagex
voc_imgs_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/JPEGImages"
voc_annotations = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/Annotations"
output_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/temp"
# 需要一张一张的检查啊
# yolo 格式: .jpg .txt train.txt val.txt
label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
total_img_num = len(glob.glob(os.path.join(voc_imgs_dir, "*.jpg")))
print("共有多少张jpg图片:", total_img_num)
total_xml_anno_num = len(glob.glob(os.path.join(voc_annotations, "*.xml")))
print("共有多少个xml的标注:", total_xml_anno_num)
assert total_img_num == total_xml_anno_num, "图片和xml标注数量必须一致!"
# 共有多少张jpg图片: 2059
# 共有多少个xml的标注: 2059
for idx, img_path in enumerate(glob.glob(os.path.join(voc_imgs_dir, "*.jpg"))):
ori_img = cv2.imread(img_path)
ori_h, ori_w = ori_img.shape[:2]
ori_img_name = img_path.strip().split("/")[-1].split(".")[0]
# print("图片链接:", img_path, ori_img_name)
xml_annotation_path = os.path.join(voc_annotations, "%s.xml" % ori_img_name)
if not os.path.isfile(xml_annotation_path):
print("找不到xml标注啊:", xml_annotation_path)
continue
else:
pass
# print("找到了xml标注:", xml_annotation_path)
# 读取文件
# xml_annotation_path = "*.xml" from xml.dom.minidom import parse
dom = parse(xml_annotation_path)
# 获取文档元素对象
data = dom.documentElement
objs = data.getElementsByTagName('object')
shutil.copy2(img_path, output_dir)
new_text_file_writer = open(os.path.join(output_dir, "%s.txt" % ori_img_name), "w")
# 0 0.622266 0.552778 0.289844 0.483333 fire
# # <x_center> <y_center> <width> <height>
for obj in objs:
# 获取标签中内容
name = obj.getElementsByTagName('name')[0].childNodes[0].nodeValue
x1 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('xmin')[0].childNodes[0].nodeValue
y1 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('ymin')[0].childNodes[0].nodeValue
x2 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('xmax')[0].childNodes[0].nodeValue
y2 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('ymax')[0].childNodes[0].nodeValue
# print('name:', name, ', box:[{},{},{},{}]'.format(x1, y1, x2, y2))
rect_box = [int(x1), int(y1), int(x2), int(y2)]
# ori_img = drawRectBox(ori_img, rect_fire_smoke, name, color=(0, 0, 255))
box_width = max(0, (int(x2) - int(x1)) * 1.0 / ori_w)
box_height = max(0, (int(y2) - int(y1)) * 1.0 / ori_h)
box_cx = max(0, (int(x2) + int(x1)) / (ori_w * 2.0))
box_cy = max(0, (int(y2) + int(y1)) / (ori_h * 2.0))
if label_dict["0"] == name:
new_text_file_writer.write("0 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
elif label_dict["1"] == name:
new_text_file_writer.write("1 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
elif label_dict["2"] == name:
new_text_file_writer.write("2 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
elif label_dict["3"] == name:
new_text_file_writer.write("3 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
elif label_dict["4"] == name:
new_text_file_writer.write("4 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
new_text_file_writer.close()
# cv2.imshow("ori", ori_img)
# cv2.waitKey(0)
# if idx == 9:
# assert 0 == 1, "停"
复制代码
# -*- coding: utf-8 -*-
__author__ = u'江锦炬 微信:dfy_88888'
__date__ = '2020/11/3 下午2:52'
__product__ = 'PyCharm'
__filename__ = 'dfy_demo01'
import cv2
import os
import glob
import cv2
import os
import shutil
# import xml.etree.ElementTree as ET
from xml.dom.minidom import parse
import numpy as np
import time
import math
import logging as log
import glob
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
fontC = ImageFont.truetype("./Font/platech.ttf", 16, 0)
# 打上boundingbox和标签
def drawRectBox(image, rect, addText, color=(0, 0, 255)):
cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[2]), int(rect[3])), color, 2,
cv2.LINE_AA)
# 这个矩形是 文本的边框 高度=18 宽度=115 -1是填充? 红色底
cv2.rectangle(image, (int(rect[0] - 1), int(rect[1]) - 18), (int(rect[0] + 115), int(rect[1])), color, -1,
cv2.LINE_AA)
img = Image.fromarray(image)
draw = ImageDraw.Draw(img)
# draw.text((int(rect[0]+1), int(rect[1]-16)), addText.decode("utf-8"), (255, 255, 255), font=fontC)
# 字体颜色是白色
draw.text((int(rect[0] + 1), int(rect[1] - 16)), addText, (255, 255, 255), font=fontC)
imagex = np.array(img)
return imagex
# 验证一下 yolo格式的标注数据 数据来源:yolo-mark标注的 或 其他格式转换过来的 *.jpg *.txt
input_imgs_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/temp"
# label_dict = {"0": "fire", "1": "smoke"}
label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
img_num = len(glob.glob(os.path.join(input_imgs_dir, "*.jpg")))
txt_num = len(glob.glob(os.path.join(input_imgs_dir, "*.txt")))
assert img_num == txt_num, "图片和标注文件个数必须一致!"
for idx, img_path in enumerate(glob.glob(os.path.join(input_imgs_dir, "*.jpg"))):
ori_img = cv2.imread(img_path)
ori_h, ori_w = ori_img.shape[:2]
txt_file_path = img_path.strip().split(".")[0] + ".txt"
ori_img_name = img_path.strip().split("/")[-1].split(".")[0]
with open(txt_file_path, "r") as f:
lines = f.readlines()
for line in lines:
class_id, cx, cy, w, h = line.strip().split()
cx = float(cx) * ori_w
cy = float(cy) * ori_h
w = float(w) * ori_w
h = float(h) * ori_h
x1 = int(cx - w / 2)
y1 = int(cy - h / 2)
x2 = int(cx + w / 2)
y2 = int(cy + h / 2)
rect_fire_smoke = [x1, y1, x2, y2]
ori_img = drawRectBox(ori_img, rect_fire_smoke, label_dict[class_id], color=(0, 0, 255))
# cv2.rectangle(ori_img, (x1, y1), (x2, y2), (0, 0, 255), thickness=2)
print("当前正在查看的图片:", ori_img_name)
cv2.imshow("ori", ori_img)
cv2.waitKey(0)
if idx == 300:
assert 0 == 1, "停"
复制代码
作者:
leironh
时间:
2020-12-5 21:25
感谢分享
作者:
leironh
时间:
2020-12-5 21:26
干货满满
作者:
leironh
时间:
2020-12-5 23:40
566666666666666666666
作者:
leironh
时间:
2020-12-6 09:15
Python高级编程与AI数据分析课程
作者:
leironh
时间:
2020-12-6 09:18
Python高级编程与AI数据分析课程
作者:
leironh
时间:
2020-12-6 09:19
Python高级编程与AI数据分析课程
作者:
leironh
时间:
2020-12-6 09:20
Python高级编程与AI数据分析课程
作者:
leironh
时间:
2020-12-6 09:25
笔记写的挺好的,点个赞
作者:
leironh
时间:
2020-12-6 09:29
笔记写的挺好的,点个赞
作者:
leironh
时间:
2020-12-6 09:36
目标检测之coco与voc格式的数据相互转换,并验证coco格式的脚本目标检测之coco与voc格式的数据相互转换,并验证coco格式的脚本
作者:
小别离
时间:
2021-1-12 16:47
感觉编码的好牛厉害
作者:
小别离
时间:
2021-1-12 16:47
我一个学材料的生生的要去学计算机
作者:
小别离
时间:
2021-1-12 16:48
还是得从python基础来学啊,好多东西看不懂呢
作者:
小别离
时间:
2021-1-12 16:48
kuaile 666666666666666666666666666666666666666666
作者:
小别离
时间:
2021-1-12 16:49
快晋级到总计会员了
作者:
小别离
时间:
2021-1-12 16:49
材料为什么要和机器学习联系到一起呢
作者:
小别离
时间:
2021-1-12 16:49
我都不知道我在学什么
作者:
小别离
时间:
2021-1-12 16:50
虽然可以直接调包使用,但是有很多代码看不懂呢
作者:
小别离
时间:
2021-1-12 16:50
慢慢来吧。噶虐这个年有得忙了
作者:
小别离
时间:
2021-1-12 16:51
换一个主题吧
欢迎光临 东方耀AI技术分享 (http://www.ai111.vip/)
Powered by Discuz! X3.4