先进驾驶辅助系统ADAS业务实战项目总结 - 东方耀AI技术分享

# -*- coding: utf-8 -*-
__author__ = u'东方耀微信：dfy_88888'
__date__ = '2019/7/15 下午3:23'
__product__ = 'PyCharm'
__filename__ = 'kitti2voc'
import cv2
import glob
from xml.dom.minidom import Document
list_anno_files = glob.glob('/home/dfy888/DataSets/Kitti_voc/training/label_2/*')
def writexml(filename, saveimg, bboxes, xmlpath, typename):
"""
写成voc格式通用的xml文件
:param filename: 图片的路径
:param saveimg: 图片对象 cv2
:param bboxes: 多个人脸框集合
:param xmlpath: xml文件路径
:return:
"""
doc = Document()
# 根节点
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
# 注意：widerface_voc voc格式数据的文件夹名字
folder_name = doc.createTextNode('Kitti_voc')
folder.appendChild(folder_name)
annotation.appendChild(folder)
filenamenode = doc.createElement('filename')
filename_name = doc.createTextNode(filename)
filenamenode.appendChild(filename_name)
annotation.appendChild(filenamenode)
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode('Kitti Database'))
source.appendChild(database)
annotation_s = doc.createElement('annotation')
annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))
source.appendChild(annotation_s)
image = doc.createElement('image')
image.appendChild(doc.createTextNode('flickr'))
source.appendChild(image)
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('-1'))
source.appendChild(flickrid)
owner = doc.createElement('owner')
annotation.appendChild(owner)
flickrid_o = doc.createElement('flickrid')
flickrid_o.appendChild(doc.createTextNode('dfy_88888'))
owner.appendChild(flickrid_o)
name_o = doc.createElement('name')
name_o.appendChild(doc.createTextNode('dfy_88888'))
owner.appendChild(name_o)
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(saveimg.shape[1])))
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(saveimg.shape[0])))
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(str(saveimg.shape[2])))
size.appendChild(width)
size.appendChild(height)
size.appendChild(depth)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode('0'))
annotation.appendChild(segmented)
for i in range(len(bboxes)):
# bbox 四维向量： [左上角坐标x y 宽高 w h]
bbox = bboxes[i]
objects = doc.createElement('object')
annotation.appendChild(objects)
object_name = doc.createElement('name')
# 不是只有人脸 adas业务场景下行人车辆交通标示
object_name.appendChild(doc.createTextNode(typename[i]))
objects.appendChild(object_name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
objects.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('1'))
objects.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
objects.appendChild(difficult)
bndbox = doc.createElement('bndbox')
objects.appendChild(bndbox)
# xmin ymin 就是标记框左上角的坐标
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(bbox[0])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(bbox[1])))
bndbox.appendChild(ymin)
# xmax ymax 就是标记框右下角的坐标
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(bbox[2])))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(str(bbox[3])))
bndbox.appendChild(ymax)
with open(xmlpath, 'w') as f:
f.write(doc.toprettyxml(indent=''))
# 转换数据集（Kitti---> VOC）
trainval = open('/home/dfy888/DataSets/Kitti_voc/ImageSets/Main/trainval.txt', 'w')
train = open('/home/dfy888/DataSets/Kitti_voc/ImageSets/Main/train.txt', 'w')
val = open('/home/dfy888/DataSets/Kitti_voc/ImageSets/Main/val.txt', 'w')
test = open('/home/dfy888/DataSets/Kitti_voc/ImageSets/Main/test.txt', 'w')
index = 0
# 7481
for file_path in list_anno_files:
with open(file_path) as f:
# 每一个标注文件txt格式的
anno_infos = f.readlines()
# print anno_infos
bboxes = []
typename = []
for anno_item in anno_infos:
# 对每一行的信息进行解析
anno_item_infos = anno_item.split()
if anno_item_infos[0] == 'Misc' or anno_item_infos[0] == 'DontCare':
# 将杂项与不关心的过滤掉模型训练更容易一些
continue
else:
typename.append(anno_item_infos[0])
bbox = (int(float(anno_item_infos[4])), int(float(anno_item_infos[5])),
int(float(anno_item_infos[6])), int(float(anno_item_infos[7])))
bboxes.append(bbox)
pass
filename = file_path.split('/')[-1].replace('txt', 'png')
xmlpath = '/home/dfy888/DataSets/Kitti_voc/Annotations/' + filename.replace('png', 'xml')
img_path = '/home/dfy888/DataSets/Kitti_voc/JPEGImages/' + filename
saveimg = cv2.imread(img_path)
writexml(filename, saveimg, bboxes, xmlpath, typename)
# :param img_set_type: trainval or val or test or train
# trainval 90% test 10%
# train 70% val 20%
if index > len(list_anno_files) * 0.9:
test.write(filename.replace('.png', '\n'))
else:
trainval.write(filename.replace('.png', '\n'))
if index > len(list_anno_files) * 0.7:
val.write(filename.replace('.png', '\n'))
else:
train.write(filename.replace('.png', '\n'))
print '正在处理：' + str(index)
index += 1
train.close()
trainval.close()
test.close()
val.close()

复制代码

# -*- coding: utf-8 -*-
__author__ = u'东方耀微信：dfy_88888'
__date__ = '2019/9/8 上午10:48'
__product__ = 'PyCharm'
__filename__ = 'demo_detector_dfy.py'
"""
Demo script showing detections in sample images.
东方修改的：利用faster_rcnn vgg16的预训练模型进行目标检测demo
"""
import _init_paths
from fast_rcnn.config import cfg
from nms.gpu_nms import gpu_nms
from nms.cpu_nms import cpu_nms
import time
import matplotlib.pyplot as plt
import numpy as np
import caffe
import os
import cv2
# pascal voc 共21类（含背景）
CLASSES = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
class Timer(object):
"""A simple timer."""
def __init__(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
def tic(self):
# using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average=True):
self.diff = time.time() - self.start_time
self.total_time += self.diff
self.calls += 1
self.average_time = self.total_time / self.calls
if average:
return self.average_time
else:
return self.diff
def nms(dets, thresh, force_cpu=False):
"""Dispatch to either CPU or GPU NMS implementations."""
if dets.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
else:
return cpu_nms(dets, thresh)
def bbox_transform_inv(boxes, deltas):
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
dx = deltas[:, 0::4]
dy = deltas[:, 1::4]
dw = deltas[:, 2::4]
dh = deltas[:, 3::4]
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
# y2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
return pred_boxes
def clip_boxes(boxes, im_shape):
"""
Clip boxes to image boundaries.
"""
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def im_list_to_blob(ims):
"""Convert a list of images into a network input.
ims:一个列表里面都是图片（cv2.resize之后的放进去的）
Assumes images are already prepared (means subtracted, BGR order, ...).
"""
# max_shape ：能满足要求或条件的最大的尺寸
max_shape = np.array([im.shape for im in ims]).max(axis=0)
num_images = len(ims)
# 初始化一个空的4维矩阵 shape:(N H W 3)
blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
dtype=np.float32)
for i in xrange(num_images):
im = ims[i]
# 把实际的图片数据赋值给空的4维矩阵
blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
# Move channels (axis 3) to axis 1
# Axis order will become: (batch elem, channel, height, width)
channel_swap = (0, 3, 1, 2)
# 将通道数放前面来
blob = blob.transpose(channel_swap)
return blob
def vis_detections(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return
im = im[:, :, (2, 1, 0)]
fig, ax = plt.subplots(figsize=(12, 12))
ax.imshow(im, aspect='equal')
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]
ax.add_patch(
plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=False,
edgecolor='red', linewidth=3.5)
)
ax.text(bbox[0], bbox[1] - 2,
'{:s} {:.3f}'.format(class_name, score),
bbox=dict(facecolor='blue', alpha=0.5),
fontsize=14, color='white')
ax.set_title(('{} detections with '
'p({} | box) >= {:.1f}').format(class_name, class_name,
thresh),
fontsize=14)
plt.axis('off')
plt.tight_layout()
plt.draw()
def _get_image_blob(im):
"""Converts an image into a network input.
Arguments:
im (ndarray): a color image in BGR order
Returns:
blob (ndarray): a data blob holding an image pyramid
im_scale_factors (list): list of image scales (relative to im) used
in the image pyramid
"""
im_orig = im.astype(np.float32, copy=True)
# 图片减掉均值操作
im_orig -= cfg.PIXEL_MEANS
# 原始图片的shape
im_shape = im_orig.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
processed_ims = []
im_scale_factors = []
# cfg.TEST.SCALES = (600,)
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
# cfg.TEST.MAX_SIZE = 1000
if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
# 为了保证图片缩放后最大的尺寸(不论宽高)不能比预先配置的最大尺寸还要大
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
# 到此就得到满足条件的图片缩放比例im_scale
im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR)
im_scale_factors.append(im_scale)
processed_ims.append(im)
# Create a blob to hold the input images
blob = im_list_to_blob(processed_ims)
return blob, np.array(im_scale_factors)
def _project_im_rois(im_rois, scales):
"""Project image RoIs into the image pyramid built by _get_image_blob.
Arguments:
im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
scales (list): scale factors as returned by _get_image_blob
Returns:
rois (ndarray): R x 4 matrix of projected RoI coordinates
levels (list): image pyramid levels used by each projected RoI
"""
im_rois = im_rois.astype(np.float, copy=False)
if len(scales) > 1:
widths = im_rois[:, 2] - im_rois[:, 0] + 1
heights = im_rois[:, 3] - im_rois[:, 1] + 1
areas = widths * heights
scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
diff_areas = np.abs(scaled_areas - 224 * 224)
levels = diff_areas.argmin(axis=1)[:, np.newaxis]
else:
levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
rois = im_rois * scales[levels]
return rois, levels
def _get_rois_blob(im_rois, im_scale_factors):
"""Converts RoIs into network inputs.
Arguments:
im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
im_scale_factors (list): scale factors as returned by _get_image_blob
Returns:
blob (ndarray): R x 5 matrix of RoIs in the image pyramid
"""
rois, levels = _project_im_rois(im_rois, im_scale_factors)
rois_blob = np.hstack((levels, rois))
return rois_blob.astype(np.float32, copy=False)
def _get_blobs(im, rois):
"""Convert an image and RoIs within that image into network inputs."""
blobs = {'data': None, 'rois': None}
blobs['data'], im_scale_factors = _get_image_blob(im)
if not cfg.TEST.HAS_RPN:
print >> dfy_log_file_writer, '没有RPN网络的情况 rois不为None'
blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
print >> dfy_log_file_writer, 'blobs:', blobs.keys(), blobs['data'].shape, im_scale_factors
return blobs, im_scale_factors
def im_detect(net, im, boxes=None):
"""Detect object classes in an image given object proposals.
Arguments:
net (caffe.Net): Fast R-CNN network to use
im (ndarray): color image to test (in BGR order)
boxes (ndarray): R x 4 array of object proposals or None (for RPN)
Returns:
scores (ndarray): R x K array of object class scores (K includes
background as object category 0)
boxes (ndarray): R x (4*K) array of predicted bounding boxes
"""
# 将原始图片进行缩放宽高同比例缩放
blobs, im_scales = _get_blobs(im, boxes)
# When mapping from image ROIs to feature map ROIs, there's some aliasing
# (some distinct image ROIs get mapped to the same feature ROI).
# Here, we identify duplicate feature ROIs, so we only compute features
# on the unique subset.
if cfg.TEST.HAS_RPN:
im_blob = blobs['data']
blobs['im_info'] = np.array(
[[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
dtype=np.float32)
print >> dfy_log_file_writer, '有RPN网络：', blobs.keys(), blobs['im_info']
# reshape network inputs
print >> dfy_log_file_writer, 'reshape操作:', blobs['data'].shape
net.blobs['data'].reshape(*(blobs['data'].shape))
if cfg.TEST.HAS_RPN:
print >> dfy_log_file_writer, 'reshape操作:', blobs['im_info'].shape
net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
else:
net.blobs['rois'].reshape(*(blobs['rois'].shape))
# do forward 开始前向计算
forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
if cfg.TEST.HAS_RPN:
forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
else:
forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
# 前向计算
blobs_out = net.forward(**forward_kwargs)
# https://blog.csdn.net/tina_ttl/article/details/51033660 caffe中如何可视化cnn各层的输出
for layer_name, blob in net.blobs.iteritems():
print >> dfy_log_file_writer, '层名+shape：' + layer_name + '\t' + str(blob.data.shape)
for layer_name, param in net.params.iteritems():
print >> dfy_log_file_writer, '层名+网络W与b：' + layer_name + '\t' + str(param[0].data.shape), str(param[1].data.shape)
print >> dfy_log_file_writer, '前向计算结果blobs_out:\n', blobs_out.keys(), \
blobs_out['bbox_pred'].shape, \
blobs_out['cls_prob'].shape
if cfg.TEST.HAS_RPN:
assert len(im_scales) == 1, "Only single-image batch implemented"
rois = net.blobs['rois'].data.copy()
print >> dfy_log_file_writer, '\nrois:', rois, rois.shape
# unscale back to raw image space
boxes = rois[:, 1:5] / im_scales[0]
print >> dfy_log_file_writer, '\nboxes:', boxes, boxes.shape
# use softmax estimated probabilities
scores = blobs_out['cls_prob']
if cfg.TEST.BBOX_REG:
# True
# Apply bounding-box regression deltas
box_deltas = blobs_out['bbox_pred']
pred_boxes = bbox_transform_inv(boxes, box_deltas)
pred_boxes = clip_boxes(pred_boxes, im.shape)
return scores, pred_boxes
def demo(net, image_name):
"""Detect object classes in an image using pre-computed object proposals."""
# Load the demo image
im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
print >> dfy_log_file_writer, 'start检测图片路径：', im_file
im = cv2.imread(im_file)
print >> dfy_log_file_writer, '图片原始大小HWC：', im.shape
# Detect all object classes and regress object bounds
timer = Timer()
timer.tic()
scores, boxes = im_detect(net, im)
timer.toc()
# ('scores.shape:', (300, 21))
print('scores.shape:', scores.shape)
# ('boxes.shape:', (300, 84))
print('boxes.shape:', boxes.shape)
print ('Detection took {:.3f}s for '
'{:d} object proposals').format(timer.total_time, boxes.shape[0])
# Visualize detections for each class
# 置信度阈值
CONF_THRESH = 0.95
# nms阈值
NMS_THRESH = 0.6
for cls_ind, cls in enumerate(CLASSES[1:]):
cls_ind += 1 # because we skipped background
cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
cls_scores = scores[:, cls_ind]
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)
# 先使用nms的阈值
keep = nms(dets, NMS_THRESH)
dets = dets[keep, :]
# 可视化的时候再使用置信度阈值
vis_detections(im, cls, dets, thresh=CONF_THRESH)
if __name__ == '__main__':
cfg.TEST.HAS_RPN = True # Use RPN for proposals
# 相对应deploy.prototxt文件 caffemodel deploy.prototxt
prototxt = 'models/pascal_voc/VGG16/faster_rcnn_alt_opt/faster_rcnn_test.pt'
caffemodel = 'data/faster_rcnn_models/VGG16_faster_rcnn_final.caffemodel'
caffe.set_device(0)
caffe.set_mode_gpu()
# dfy log打印结果文件
# https://blog.csdn.net/jiongnima/article/details/80016683
# 详细的Faster R-CNN源码解析之ROI-Pooling逐行代码解析
dfy_log_file = "tools/demo_detector_dfy.log"
dfy_log_file_writer = open(dfy_log_file, 'w')
net = caffe.Net(prototxt, caffemodel, caffe.TEST)
# im_names = ['001763.jpg', '004545.jpg']
im_names = ['test01.png']
for im_name in im_names:
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
print 'Demo for data/demo/{}'.format(im_name)
demo(net, im_name)
plt.show()
dfy_log_file_writer.close()

复制代码