目标检测之coco与voc格式的数据相互转换，并验证coco格式的... - 东方耀AI技术分享

# -*- coding: utf-8 -*-
__author__ = u'江锦炬微信：dfy_88888'
__date__ = '2020/12/1 上午11:27'
__product__ = 'PyCharm'
__filename__ = '08_coco_to_voc'
# import os, cv2, sys, shutil
import cv2
import shutil
import glob
import os
from xml.dom.minidom import Document
# 将coco和yolo格式的目标检测数据集转换为voc格式的，方便进行anchor的k-means聚类
train_dir_coco = "/media/jiang/AI_DataSets/DataSets/person_vehicle_coco/person_vehicle_yolo_train"
val_dir_coco = "/media/jiang/AI_DataSets/DataSets/person_vehicle_coco/person_vehicle_yolo_val"
label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
output_dir_voc = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc"
def writexml(img_path, ori_hw, bboxes, xmlpath):
doc = Document()
# 根节点
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
# 注意：widerface_voc voc格式数据的文件夹名字
folder_name = doc.createTextNode('person_vehicle_voc')
folder.appendChild(folder_name)
annotation.appendChild(folder)
filenamenode = doc.createElement('filename')
filename_name = doc.createTextNode(img_path)
filenamenode.appendChild(filename_name)
annotation.appendChild(filenamenode)
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode('coco Database'))
source.appendChild(database)
annotation_s = doc.createElement('annotation')
annotation_s.appendChild(doc.createTextNode('jjj VOC2020'))
source.appendChild(annotation_s)
image = doc.createElement('image')
image.appendChild(doc.createTextNode('flickr'))
source.appendChild(image)
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('-1'))
source.appendChild(flickrid)
owner = doc.createElement('owner')
annotation.appendChild(owner)
flickrid_o = doc.createElement('flickrid')
flickrid_o.appendChild(doc.createTextNode('jjj_88888'))
owner.appendChild(flickrid_o)
name_o = doc.createElement('name')
name_o.appendChild(doc.createTextNode('jjj_88888'))
owner.appendChild(name_o)
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(ori_hw[1])))
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(ori_hw[0])))
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode("3"))
size.appendChild(width)
size.appendChild(height)
size.appendChild(depth)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode('0'))
annotation.appendChild(segmented)
for i in range(len(bboxes)):
# bbox = ['person', 412, 157, 465, 295]
bbox = bboxes[i]
objects = doc.createElement('object')
annotation.appendChild(objects)
object_name = doc.createElement('name')
#
object_name.appendChild(doc.createTextNode(bbox[0]))
objects.appendChild(object_name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
objects.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('1'))
objects.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
objects.appendChild(difficult)
bndbox = doc.createElement('bndbox')
objects.appendChild(bndbox)
# xmin ymin 就是标记框左上角的坐标
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(bbox[1])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(bbox[2])))
bndbox.appendChild(ymin)
# xmax ymax 就是标记框右下角的坐标
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(bbox[3])))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(str(bbox[4])))
bndbox.appendChild(ymax)
with open(xmlpath, 'w') as f:
f.write(doc.toprettyxml(indent=''))
def convert_imgset(img_set_type, dir_coco):
"""
转换数据集（WiderFace---> VOC）
:param img_set_type: train or val
:return:
"""
img_num = len(glob.glob(os.path.join(dir_coco, "*.jpg")))
txt_label_num = len(glob.glob(os.path.join(dir_coco, "*.txt")))
assert img_num == txt_label_num, "必须一致！"
print("数据集：{};图片数量={};标签txt文件数：{}".format(img_set_type, img_num, txt_label_num))
output_set_txt = os.path.join(output_dir_voc, "ImageSets", "Main", "%s.txt" % img_set_type)
output_set_txt_writer = open(output_set_txt, "w")
for img_path in glob.glob(os.path.join(dir_coco, "*.jpg")):
shutil.copy2(img_path, os.path.join(output_dir_voc, "JPEGImages"))
ori_h, ori_w = cv2.imread(img_path).shape[:2]
img_name = img_path.strip().split("/")[-1].split(".")[0]
txt_path = img_path.strip().replace("jpg", "txt")
print("图片的宽={},高={},name={}, txt_path={}".format(ori_w, ori_h, img_name, txt_path))
output_set_txt_writer.write("%s\n" % img_name)
# os.path.join(output_dir_voc, "Annotations")
# "%d %.6f %.6f %.6f %.6f\n" % (class_id, cx, cy, w, h)
bboxes = []
with open(txt_path, "r") as f:
lines = f.readlines()
for line in lines:
class_id, cx, cy, w, h = line.strip().split(" ")
x1 = float(cx) - float(w) / 2
x2 = float(cx) + float(w) / 2
y1 = float(cy) - float(h) / 2
y2 = float(cy) + float(h) / 2
x1_int = int(x1 * ori_w)
y1_int = int(y1 * ori_h)
x2_int = int(x2 * ori_w)
y2_int = int(y2 * ori_h)
bbox = [label_dict[class_id], x1_int, y1_int, x2_int, y2_int]
bboxes.append(bbox)
if len(bboxes) == 0:
print('txt的标注文件没有任何目标类')
continue
print("该图片={}的所有box标注：{}".format(img_name, bboxes))
annotation_xml_path = os.path.join(output_dir_voc, "Annotations", "%s.xml" % img_name)
writexml(img_path, (ori_h, ori_w), bboxes, annotation_xml_path)
# assert 0 == 1, "停"
output_set_txt_writer.close()
if __name__ == '__main__':
# convert_imgset('val', val_dir_coco)
convert_imgset('train', train_dir_coco)

复制代码

# -*- coding: utf-8 -*-
from __future__ import division, print_function
__author__ = u'江锦炬微信：dfy_88888'
__date__ = '2020/11/3 下午4:04'
__product__ = 'PyCharm'
__filename__ = 'dfy_voc_2_yolo_format'
import cv2
import os
import shutil
import glob
# import xml.etree.ElementTree as ET
from xml.dom.minidom import parse
import numpy as np
import time
import math
import logging as log
import glob
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
fontC = ImageFont.truetype("./Font/platech.ttf", 16, 0)
# 打上boundingbox和标签
def drawRectBox(image, rect, addText, color=(0, 0, 255)):
cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[2]), int(rect[3])), color, 2,
cv2.LINE_AA)
# 这个矩形是文本的边框高度=18 宽度=115 -1是填充？红色底
cv2.rectangle(image, (int(rect[0] - 1), int(rect[1]) - 18), (int(rect[0] + 115), int(rect[1])), color, -1,
cv2.LINE_AA)
img = Image.fromarray(image)
draw = ImageDraw.Draw(img)
# draw.text((int(rect[0]+1), int(rect[1]-16)), addText.decode("utf-8"), (255, 255, 255), font=fontC)
# 字体颜色是白色
draw.text((int(rect[0] + 1), int(rect[1] - 16)), addText, (255, 255, 255), font=fontC)
imagex = np.array(img)
return imagex
voc_imgs_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/JPEGImages"
voc_annotations = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/Annotations"
output_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/temp"
# 需要一张一张的检查啊
# yolo 格式： .jpg .txt train.txt val.txt
label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
total_img_num = len(glob.glob(os.path.join(voc_imgs_dir, "*.jpg")))
print("共有多少张jpg图片：", total_img_num)
total_xml_anno_num = len(glob.glob(os.path.join(voc_annotations, "*.xml")))
print("共有多少个xml的标注：", total_xml_anno_num)
assert total_img_num == total_xml_anno_num, "图片和xml标注数量必须一致！"
# 共有多少张jpg图片： 2059
# 共有多少个xml的标注： 2059
for idx, img_path in enumerate(glob.glob(os.path.join(voc_imgs_dir, "*.jpg"))):
ori_img = cv2.imread(img_path)
ori_h, ori_w = ori_img.shape[:2]
ori_img_name = img_path.strip().split("/")[-1].split(".")[0]
# print("图片链接：", img_path, ori_img_name)
xml_annotation_path = os.path.join(voc_annotations, "%s.xml" % ori_img_name)
if not os.path.isfile(xml_annotation_path):
print("找不到xml标注啊：", xml_annotation_path)
continue
else:
pass
# print("找到了xml标注：", xml_annotation_path)
# 读取文件
# xml_annotation_path = "*.xml" from xml.dom.minidom import parse
dom = parse(xml_annotation_path)
# 获取文档元素对象
data = dom.documentElement
objs = data.getElementsByTagName('object')
shutil.copy2(img_path, output_dir)
new_text_file_writer = open(os.path.join(output_dir, "%s.txt" % ori_img_name), "w")
# 0 0.622266 0.552778 0.289844 0.483333 fire
# # <x_center> <y_center> <width> <height>
for obj in objs:
# 获取标签中内容
name = obj.getElementsByTagName('name')[0].childNodes[0].nodeValue
x1 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('xmin')[0].childNodes[0].nodeValue
y1 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('ymin')[0].childNodes[0].nodeValue
x2 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('xmax')[0].childNodes[0].nodeValue
y2 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('ymax')[0].childNodes[0].nodeValue
# print('name:', name, ', box:[{},{},{},{}]'.format(x1, y1, x2, y2))
rect_box = [int(x1), int(y1), int(x2), int(y2)]
# ori_img = drawRectBox(ori_img, rect_fire_smoke, name, color=(0, 0, 255))
box_width = max(0, (int(x2) - int(x1)) * 1.0 / ori_w)
box_height = max(0, (int(y2) - int(y1)) * 1.0 / ori_h)
box_cx = max(0, (int(x2) + int(x1)) / (ori_w * 2.0))
box_cy = max(0, (int(y2) + int(y1)) / (ori_h * 2.0))
if label_dict["0"] == name:
new_text_file_writer.write("0 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
elif label_dict["1"] == name:
new_text_file_writer.write("1 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
elif label_dict["2"] == name:
new_text_file_writer.write("2 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
elif label_dict["3"] == name:
new_text_file_writer.write("3 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
elif label_dict["4"] == name:
new_text_file_writer.write("4 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
new_text_file_writer.close()
# cv2.imshow("ori", ori_img)
# cv2.waitKey(0)
# if idx == 9:
# assert 0 == 1, "停"

复制代码

# -*- coding: utf-8 -*-
__author__ = u'江锦炬微信：dfy_88888'
__date__ = '2020/11/3 下午2:52'
__product__ = 'PyCharm'
__filename__ = 'dfy_demo01'
import cv2
import os
import glob
import cv2
import os
import shutil
# import xml.etree.ElementTree as ET
from xml.dom.minidom import parse
import numpy as np
import time
import math
import logging as log
import glob
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
fontC = ImageFont.truetype("./Font/platech.ttf", 16, 0)
# 打上boundingbox和标签
def drawRectBox(image, rect, addText, color=(0, 0, 255)):
cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[2]), int(rect[3])), color, 2,
cv2.LINE_AA)
# 这个矩形是文本的边框高度=18 宽度=115 -1是填充？红色底
cv2.rectangle(image, (int(rect[0] - 1), int(rect[1]) - 18), (int(rect[0] + 115), int(rect[1])), color, -1,
cv2.LINE_AA)
img = Image.fromarray(image)
draw = ImageDraw.Draw(img)
# draw.text((int(rect[0]+1), int(rect[1]-16)), addText.decode("utf-8"), (255, 255, 255), font=fontC)
# 字体颜色是白色
draw.text((int(rect[0] + 1), int(rect[1] - 16)), addText, (255, 255, 255), font=fontC)
imagex = np.array(img)
return imagex
# 验证一下 yolo格式的标注数据数据来源：yolo-mark标注的或其他格式转换过来的 *.jpg *.txt
input_imgs_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/temp"
# label_dict = {"0": "fire", "1": "smoke"}
label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
img_num = len(glob.glob(os.path.join(input_imgs_dir, "*.jpg")))
txt_num = len(glob.glob(os.path.join(input_imgs_dir, "*.txt")))
assert img_num == txt_num, "图片和标注文件个数必须一致！"
for idx, img_path in enumerate(glob.glob(os.path.join(input_imgs_dir, "*.jpg"))):
ori_img = cv2.imread(img_path)
ori_h, ori_w = ori_img.shape[:2]
txt_file_path = img_path.strip().split(".")[0] + ".txt"
ori_img_name = img_path.strip().split("/")[-1].split(".")[0]
with open(txt_file_path, "r") as f:
lines = f.readlines()
for line in lines:
class_id, cx, cy, w, h = line.strip().split()
cx = float(cx) * ori_w
cy = float(cy) * ori_h
w = float(w) * ori_w
h = float(h) * ori_h
x1 = int(cx - w / 2)
y1 = int(cy - h / 2)
x2 = int(cx + w / 2)
y2 = int(cy + h / 2)
rect_fire_smoke = [x1, y1, x2, y2]
ori_img = drawRectBox(ori_img, rect_fire_smoke, label_dict[class_id], color=(0, 0, 255))
# cv2.rectangle(ori_img, (x1, y1), (x2, y2), (0, 0, 255), thickness=2)
print("当前正在查看的图片：", ori_img_name)
cv2.imshow("ori", ori_img)
cv2.waitKey(0)
if idx == 300:
assert 0 == 1, "停"

复制代码