|
目标检测之coco与voc格式的数据相互转换,并验证coco格式的脚本
- # -*- coding: utf-8 -*-
- __author__ = u'江锦炬 微信:dfy_88888'
- __date__ = '2020/12/1 上午11:27'
- __product__ = 'PyCharm'
- __filename__ = '08_coco_to_voc'
- # import os, cv2, sys, shutil
- import cv2
- import shutil
- import glob
- import os
- from xml.dom.minidom import Document
- # 将coco和yolo格式的目标检测数据集转换为voc格式的,方便进行anchor的k-means聚类
- train_dir_coco = "/media/jiang/AI_DataSets/DataSets/person_vehicle_coco/person_vehicle_yolo_train"
- val_dir_coco = "/media/jiang/AI_DataSets/DataSets/person_vehicle_coco/person_vehicle_yolo_val"
- label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
- output_dir_voc = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc"
- def writexml(img_path, ori_hw, bboxes, xmlpath):
- doc = Document()
- # 根节点
- annotation = doc.createElement('annotation')
- doc.appendChild(annotation)
- folder = doc.createElement('folder')
- # 注意:widerface_voc voc格式数据的文件夹名字
- folder_name = doc.createTextNode('person_vehicle_voc')
- folder.appendChild(folder_name)
- annotation.appendChild(folder)
- filenamenode = doc.createElement('filename')
- filename_name = doc.createTextNode(img_path)
- filenamenode.appendChild(filename_name)
- annotation.appendChild(filenamenode)
- source = doc.createElement('source')
- annotation.appendChild(source)
- database = doc.createElement('database')
- database.appendChild(doc.createTextNode('coco Database'))
- source.appendChild(database)
- annotation_s = doc.createElement('annotation')
- annotation_s.appendChild(doc.createTextNode('jjj VOC2020'))
- source.appendChild(annotation_s)
- image = doc.createElement('image')
- image.appendChild(doc.createTextNode('flickr'))
- source.appendChild(image)
- flickrid = doc.createElement('flickrid')
- flickrid.appendChild(doc.createTextNode('-1'))
- source.appendChild(flickrid)
- owner = doc.createElement('owner')
- annotation.appendChild(owner)
- flickrid_o = doc.createElement('flickrid')
- flickrid_o.appendChild(doc.createTextNode('jjj_88888'))
- owner.appendChild(flickrid_o)
- name_o = doc.createElement('name')
- name_o.appendChild(doc.createTextNode('jjj_88888'))
- owner.appendChild(name_o)
- size = doc.createElement('size')
- annotation.appendChild(size)
- width = doc.createElement('width')
- width.appendChild(doc.createTextNode(str(ori_hw[1])))
- height = doc.createElement('height')
- height.appendChild(doc.createTextNode(str(ori_hw[0])))
- depth = doc.createElement('depth')
- depth.appendChild(doc.createTextNode("3"))
- size.appendChild(width)
- size.appendChild(height)
- size.appendChild(depth)
- segmented = doc.createElement('segmented')
- segmented.appendChild(doc.createTextNode('0'))
- annotation.appendChild(segmented)
- for i in range(len(bboxes)):
- # bbox = ['person', 412, 157, 465, 295]
- bbox = bboxes[i]
- objects = doc.createElement('object')
- annotation.appendChild(objects)
- object_name = doc.createElement('name')
- #
- object_name.appendChild(doc.createTextNode(bbox[0]))
- objects.appendChild(object_name)
- pose = doc.createElement('pose')
- pose.appendChild(doc.createTextNode('Unspecified'))
- objects.appendChild(pose)
- truncated = doc.createElement('truncated')
- truncated.appendChild(doc.createTextNode('1'))
- objects.appendChild(truncated)
- difficult = doc.createElement('difficult')
- difficult.appendChild(doc.createTextNode('0'))
- objects.appendChild(difficult)
- bndbox = doc.createElement('bndbox')
- objects.appendChild(bndbox)
- # xmin ymin 就是标记框 左上角的坐标
- xmin = doc.createElement('xmin')
- xmin.appendChild(doc.createTextNode(str(bbox[1])))
- bndbox.appendChild(xmin)
- ymin = doc.createElement('ymin')
- ymin.appendChild(doc.createTextNode(str(bbox[2])))
- bndbox.appendChild(ymin)
- # xmax ymax 就是标记框 右下角的坐标
- xmax = doc.createElement('xmax')
- xmax.appendChild(doc.createTextNode(str(bbox[3])))
- bndbox.appendChild(xmax)
- ymax = doc.createElement('ymax')
- ymax.appendChild(doc.createTextNode(str(bbox[4])))
- bndbox.appendChild(ymax)
- with open(xmlpath, 'w') as f:
- f.write(doc.toprettyxml(indent=''))
- def convert_imgset(img_set_type, dir_coco):
- """
- 转换数据集(WiderFace---> VOC)
- :param img_set_type: train or val
- :return:
- """
- img_num = len(glob.glob(os.path.join(dir_coco, "*.jpg")))
- txt_label_num = len(glob.glob(os.path.join(dir_coco, "*.txt")))
- assert img_num == txt_label_num, "必须一致!"
- print("数据集:{};图片数量={};标签txt文件数:{}".format(img_set_type, img_num, txt_label_num))
- output_set_txt = os.path.join(output_dir_voc, "ImageSets", "Main", "%s.txt" % img_set_type)
- output_set_txt_writer = open(output_set_txt, "w")
- for img_path in glob.glob(os.path.join(dir_coco, "*.jpg")):
- shutil.copy2(img_path, os.path.join(output_dir_voc, "JPEGImages"))
- ori_h, ori_w = cv2.imread(img_path).shape[:2]
- img_name = img_path.strip().split("/")[-1].split(".")[0]
- txt_path = img_path.strip().replace("jpg", "txt")
- print("图片的宽={},高={},name={}, txt_path={}".format(ori_w, ori_h, img_name, txt_path))
- output_set_txt_writer.write("%s\n" % img_name)
- # os.path.join(output_dir_voc, "Annotations")
- # "%d %.6f %.6f %.6f %.6f\n" % (class_id, cx, cy, w, h)
- bboxes = []
- with open(txt_path, "r") as f:
- lines = f.readlines()
- for line in lines:
- class_id, cx, cy, w, h = line.strip().split(" ")
- x1 = float(cx) - float(w) / 2
- x2 = float(cx) + float(w) / 2
- y1 = float(cy) - float(h) / 2
- y2 = float(cy) + float(h) / 2
- x1_int = int(x1 * ori_w)
- y1_int = int(y1 * ori_h)
- x2_int = int(x2 * ori_w)
- y2_int = int(y2 * ori_h)
- bbox = [label_dict[class_id], x1_int, y1_int, x2_int, y2_int]
- bboxes.append(bbox)
- if len(bboxes) == 0:
- print('txt的标注文件 没有任何目标类')
- continue
- print("该图片={}的所有box标注:{}".format(img_name, bboxes))
- annotation_xml_path = os.path.join(output_dir_voc, "Annotations", "%s.xml" % img_name)
- writexml(img_path, (ori_h, ori_w), bboxes, annotation_xml_path)
- # assert 0 == 1, "停"
- output_set_txt_writer.close()
- if __name__ == '__main__':
- # convert_imgset('val', val_dir_coco)
- convert_imgset('train', train_dir_coco)
复制代码
- # -*- coding: utf-8 -*-
- from __future__ import division, print_function
- __author__ = u'江锦炬 微信:dfy_88888'
- __date__ = '2020/11/3 下午4:04'
- __product__ = 'PyCharm'
- __filename__ = 'dfy_voc_2_yolo_format'
- import cv2
- import os
- import shutil
- import glob
- # import xml.etree.ElementTree as ET
- from xml.dom.minidom import parse
- import numpy as np
- import time
- import math
- import logging as log
- import glob
- from PIL import ImageFont
- from PIL import Image
- from PIL import ImageDraw
- fontC = ImageFont.truetype("./Font/platech.ttf", 16, 0)
- # 打上boundingbox和标签
- def drawRectBox(image, rect, addText, color=(0, 0, 255)):
- cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[2]), int(rect[3])), color, 2,
- cv2.LINE_AA)
- # 这个矩形是 文本的边框 高度=18 宽度=115 -1是填充? 红色底
- cv2.rectangle(image, (int(rect[0] - 1), int(rect[1]) - 18), (int(rect[0] + 115), int(rect[1])), color, -1,
- cv2.LINE_AA)
- img = Image.fromarray(image)
- draw = ImageDraw.Draw(img)
- # draw.text((int(rect[0]+1), int(rect[1]-16)), addText.decode("utf-8"), (255, 255, 255), font=fontC)
- # 字体颜色是白色
- draw.text((int(rect[0] + 1), int(rect[1] - 16)), addText, (255, 255, 255), font=fontC)
- imagex = np.array(img)
- return imagex
- voc_imgs_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/JPEGImages"
- voc_annotations = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/Annotations"
- output_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/temp"
- # 需要一张一张的检查啊
- # yolo 格式: .jpg .txt train.txt val.txt
- label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
- total_img_num = len(glob.glob(os.path.join(voc_imgs_dir, "*.jpg")))
- print("共有多少张jpg图片:", total_img_num)
- total_xml_anno_num = len(glob.glob(os.path.join(voc_annotations, "*.xml")))
- print("共有多少个xml的标注:", total_xml_anno_num)
- assert total_img_num == total_xml_anno_num, "图片和xml标注数量必须一致!"
- # 共有多少张jpg图片: 2059
- # 共有多少个xml的标注: 2059
- for idx, img_path in enumerate(glob.glob(os.path.join(voc_imgs_dir, "*.jpg"))):
- ori_img = cv2.imread(img_path)
- ori_h, ori_w = ori_img.shape[:2]
- ori_img_name = img_path.strip().split("/")[-1].split(".")[0]
- # print("图片链接:", img_path, ori_img_name)
- xml_annotation_path = os.path.join(voc_annotations, "%s.xml" % ori_img_name)
- if not os.path.isfile(xml_annotation_path):
- print("找不到xml标注啊:", xml_annotation_path)
- continue
- else:
- pass
- # print("找到了xml标注:", xml_annotation_path)
- # 读取文件
- # xml_annotation_path = "*.xml" from xml.dom.minidom import parse
- dom = parse(xml_annotation_path)
- # 获取文档元素对象
- data = dom.documentElement
- objs = data.getElementsByTagName('object')
- shutil.copy2(img_path, output_dir)
- new_text_file_writer = open(os.path.join(output_dir, "%s.txt" % ori_img_name), "w")
- # 0 0.622266 0.552778 0.289844 0.483333 fire
- # # <x_center> <y_center> <width> <height>
- for obj in objs:
- # 获取标签中内容
- name = obj.getElementsByTagName('name')[0].childNodes[0].nodeValue
- x1 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('xmin')[0].childNodes[0].nodeValue
- y1 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('ymin')[0].childNodes[0].nodeValue
- x2 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('xmax')[0].childNodes[0].nodeValue
- y2 = obj.getElementsByTagName('bndbox')[0].getElementsByTagName('ymax')[0].childNodes[0].nodeValue
- # print('name:', name, ', box:[{},{},{},{}]'.format(x1, y1, x2, y2))
- rect_box = [int(x1), int(y1), int(x2), int(y2)]
- # ori_img = drawRectBox(ori_img, rect_fire_smoke, name, color=(0, 0, 255))
- box_width = max(0, (int(x2) - int(x1)) * 1.0 / ori_w)
- box_height = max(0, (int(y2) - int(y1)) * 1.0 / ori_h)
- box_cx = max(0, (int(x2) + int(x1)) / (ori_w * 2.0))
- box_cy = max(0, (int(y2) + int(y1)) / (ori_h * 2.0))
- if label_dict["0"] == name:
- new_text_file_writer.write("0 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
- elif label_dict["1"] == name:
- new_text_file_writer.write("1 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
- elif label_dict["2"] == name:
- new_text_file_writer.write("2 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
- elif label_dict["3"] == name:
- new_text_file_writer.write("3 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
- elif label_dict["4"] == name:
- new_text_file_writer.write("4 %.6f %.6f %.6f %.6f\n" % (box_cx, box_cy, box_width, box_height))
- new_text_file_writer.close()
- # cv2.imshow("ori", ori_img)
- # cv2.waitKey(0)
- # if idx == 9:
- # assert 0 == 1, "停"
复制代码
- # -*- coding: utf-8 -*-
- __author__ = u'江锦炬 微信:dfy_88888'
- __date__ = '2020/11/3 下午2:52'
- __product__ = 'PyCharm'
- __filename__ = 'dfy_demo01'
- import cv2
- import os
- import glob
- import cv2
- import os
- import shutil
- # import xml.etree.ElementTree as ET
- from xml.dom.minidom import parse
- import numpy as np
- import time
- import math
- import logging as log
- import glob
- from PIL import ImageFont
- from PIL import Image
- from PIL import ImageDraw
- fontC = ImageFont.truetype("./Font/platech.ttf", 16, 0)
- # 打上boundingbox和标签
- def drawRectBox(image, rect, addText, color=(0, 0, 255)):
- cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[2]), int(rect[3])), color, 2,
- cv2.LINE_AA)
- # 这个矩形是 文本的边框 高度=18 宽度=115 -1是填充? 红色底
- cv2.rectangle(image, (int(rect[0] - 1), int(rect[1]) - 18), (int(rect[0] + 115), int(rect[1])), color, -1,
- cv2.LINE_AA)
- img = Image.fromarray(image)
- draw = ImageDraw.Draw(img)
- # draw.text((int(rect[0]+1), int(rect[1]-16)), addText.decode("utf-8"), (255, 255, 255), font=fontC)
- # 字体颜色是白色
- draw.text((int(rect[0] + 1), int(rect[1] - 16)), addText, (255, 255, 255), font=fontC)
- imagex = np.array(img)
- return imagex
- # 验证一下 yolo格式的标注数据 数据来源:yolo-mark标注的 或 其他格式转换过来的 *.jpg *.txt
- input_imgs_dir = "/media/jiang/AI_DataSets/DataSets/person_vehicle_voc/temp"
- # label_dict = {"0": "fire", "1": "smoke"}
- label_dict = {"0": "person", "1": "car", "2": "motorbike", "3": "bus", "4": "truck"}
- img_num = len(glob.glob(os.path.join(input_imgs_dir, "*.jpg")))
- txt_num = len(glob.glob(os.path.join(input_imgs_dir, "*.txt")))
- assert img_num == txt_num, "图片和标注文件个数必须一致!"
- for idx, img_path in enumerate(glob.glob(os.path.join(input_imgs_dir, "*.jpg"))):
- ori_img = cv2.imread(img_path)
- ori_h, ori_w = ori_img.shape[:2]
- txt_file_path = img_path.strip().split(".")[0] + ".txt"
- ori_img_name = img_path.strip().split("/")[-1].split(".")[0]
- with open(txt_file_path, "r") as f:
- lines = f.readlines()
- for line in lines:
- class_id, cx, cy, w, h = line.strip().split()
- cx = float(cx) * ori_w
- cy = float(cy) * ori_h
- w = float(w) * ori_w
- h = float(h) * ori_h
- x1 = int(cx - w / 2)
- y1 = int(cy - h / 2)
- x2 = int(cx + w / 2)
- y2 = int(cy + h / 2)
- rect_fire_smoke = [x1, y1, x2, y2]
- ori_img = drawRectBox(ori_img, rect_fire_smoke, label_dict[class_id], color=(0, 0, 255))
- # cv2.rectangle(ori_img, (x1, y1), (x2, y2), (0, 0, 255), thickness=2)
- print("当前正在查看的图片:", ori_img_name)
- cv2.imshow("ori", ori_img)
- cv2.waitKey(0)
- if idx == 300:
- assert 0 == 1, "停"
复制代码
|
|