python的widerface生成voc格式的数据，生成xml文件的标注 - 东方耀AI技术分享

# -*- coding: utf-8 -*-
__author__ = u'东方耀微信：dfy_88888'
__date__ = '2019/7/15 下午3:23'
__product__ = 'PyCharm'
__filename__ = 'widerface2voc'
# import os, cv2, sys, shutil
import cv2
import shutil
from xml.dom.minidom import Document
root_dir = '/home/dfy888/DataSets/WIDER Face DataSet'
root_dir_voc = '/home/dfy888/DataSets/widerface_voc'
def writexml(filename, saveimg, bboxes, xmlpath):
"""
写成voc格式通用的xml文件
:param filename: 图片的路径
:param saveimg: 图片对象 cv2
:param bboxes: 多个人脸框集合
:param xmlpath: xml文件路径
:return:
"""
doc = Document()
# 根节点
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
# 注意：widerface_voc voc格式数据的文件夹名字
folder_name = doc.createTextNode('widerface_voc')
folder.appendChild(folder_name)
annotation.appendChild(folder)
filenamenode = doc.createElement('filename')
filename_name = doc.createTextNode(filename)
filenamenode.appendChild(filename_name)
annotation.appendChild(filenamenode)
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode('wider face Database'))
source.appendChild(database)
annotation_s = doc.createElement('annotation')
annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))
source.appendChild(annotation_s)
image = doc.createElement('image')
image.appendChild(doc.createTextNode('flickr'))
source.appendChild(image)
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('-1'))
source.appendChild(flickrid)
owner = doc.createElement('owner')
annotation.appendChild(owner)
flickrid_o = doc.createElement('flickrid')
flickrid_o.appendChild(doc.createTextNode('dfy_88888'))
owner.appendChild(flickrid_o)
name_o = doc.createElement('name')
name_o.appendChild(doc.createTextNode('dfy_88888'))
owner.appendChild(name_o)
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(saveimg.shape[1])))
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(saveimg.shape[0])))
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(str(saveimg.shape[2])))
size.appendChild(width)
size.appendChild(height)
size.appendChild(depth)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode('0'))
annotation.appendChild(segmented)
for i in range(len(bboxes)):
# bbox 四维向量： [左上角坐标x y 宽高 w h]
bbox = bboxes[i]
objects = doc.createElement('object')
annotation.appendChild(objects)
object_name = doc.createElement('name')
# 只有人脸
object_name.appendChild(doc.createTextNode('face'))
objects.appendChild(object_name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
objects.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('1'))
objects.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
objects.appendChild(difficult)
bndbox = doc.createElement('bndbox')
objects.appendChild(bndbox)
# xmin ymin 就是标记框左上角的坐标
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(bbox[0])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(bbox[1])))
bndbox.appendChild(ymin)
# xmax ymax 就是标记框右下角的坐标
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(bbox[0] + bbox[2])))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(str(bbox[1] + bbox[3])))
bndbox.appendChild(ymax)
with open(xmlpath, 'w') as f:
f.write(doc.toprettyxml(indent=''))
def convert_imgset(img_set_type):
"""
转换数据集（WiderFace---> VOC）
:param img_set_type: train or val
:return:
"""
# 对应数据集中原始图片的路径
img_dir = root_dir + '/WIDER_' + img_set_type + '/images'
# ground truth 的路径（标注文件中）
gt_filepath = root_dir + '/wider_face_split/wider_face_' + img_set_type + '_bbx_gt.txt'
fwrite = open(root_dir_voc + '/ImageSets/Main/' + img_set_type + '.txt', 'w')
print(img_dir)
print(gt_filepath)
# 表示我们解析到了第几张图片
index = 0
no_face_index = []
with open(gt_filepath, 'r') as gt_files:
# 为了快速只取1000个图片样本实际可以是True
while(index < 5):
# 为什么是[: -1]？去掉最后的空格
filename = gt_files.readline().strip()
# print('读取的filename:%s，其长度为：%d' % (filename, len(filename)))
if filename == '' or filename is None:
break
# 图片的绝对路径
img_path = img_dir + '/' + filename
print('读取的图片绝对路径：', img_path)
img = cv2.imread(img_path)
# 可视化看看图片
# cv2.imshow('1', img)
# cv2.waitKey(0)
if not img.data:
break
num_bbox = int(gt_files.readline())
if num_bbox == 0:
# 还是需要读一下
line = gt_files.readline()
no_face_index.append(index)
print('没有人脸框的特殊情况：', line)
bboxes = []
for i in range(num_bbox):
# 每读取一行就是一个人脸框 gt
line = gt_files.readline()
lines = line.split()
# 前面4个值
lines = lines[0: 4]
# bbox 四维向量： [左上角坐标x y 宽高 w h]
bbox = (int(lines[0]), int(lines[1]), int(lines[2]), int(lines[3]))
# 可视化看看人脸框的矩形
cv2.rectangle(img, (int(lines[0]), int(lines[1])),
(int(lines[0]) + int(lines[2]), int(lines[1]) + int(lines[3])),
color=(0, 0, 255), thickness=1)
bboxes.append(bbox)
cv2.imshow(str(index), img)
cv2.waitKey(0)
filename = filename.replace('/', '_')
print('保存后的filename：', filename)
if len(bboxes) == 0:
print('no face box')
index += 1
continue
cv2.imwrite('{}/JPEGImages/{}'.format(root_dir_voc, filename), img)
fwrite.write(filename.split('.')[0] + '\n')
xmlpath = '{}/Annotations/{}.xml'.format(root_dir_voc, filename.split('.')[0])
writexml(filename, img, bboxes, xmlpath)
print('success number is %d' % index)
index += 1
# 循环结束后
print('所有没有人脸的索引：', no_face_index)
fwrite.close()
if __name__ == '__main__':
# num of train images :12879 所有没有人脸的索引： [279, 3808, 7512, 9227]
# convert_imgset('train')
# num of val images : 3225 所有没有人脸的索引： []
convert_imgset('val')
# 修改文件名原本是 train.txt val.txt
# shutil.move(root_dir_voc + '/ImageSets/Main/' + 'train.txt', root_dir_voc + '/ImageSets/Main/' + 'trainval.txt')
# shutil.move(root_dir_voc + '/ImageSets/Main/' + 'val.txt', root_dir_voc + '/ImageSets/Main/' + 'test.txt')

复制代码