东方耀AI技术分享
标题:
faster rcnn网络结构详解
[打印本页]
作者:
东方耀
时间:
2019-9-8 09:32
标题:
faster rcnn网络结构详解
faster rcnn网络结构详解
caffe网络在线可视化工具(需要翻墙):
http://ethereon.github.io/netscope/#/editor
name: "VGG_ILSVRC_16_layers"
input: "data"
input_shape {
dim: 1
dim: 3
dim: 600
dim: 989
}
input: "im_info"
input_shape {
dim: 1
dim: 3
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
convolution_param {
num_output: 64
pad: 1 kernel_size: 3
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
convolution_param {
num_output: 64
pad: 1 kernel_size: 3
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2 stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
convolution_param {
num_output: 128
pad: 1 kernel_size: 3
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
convolution_param {
num_output: 128
pad: 1 kernel_size: 3
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2 stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
convolution_param {
num_output: 256
pad: 1 kernel_size: 3
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
convolution_param {
num_output: 256
pad: 1 kernel_size: 3
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
convolution_param {
num_output: 256
pad: 1 kernel_size: 3
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2 stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2 stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
#========= RPN ============
layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "conv5_3"
top: "rpn/output"
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}
layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
}
}
layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
}
}
layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}
#========= RoI Proposal ============
layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}
layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}
layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rois'
python_param {
module: 'rpn.proposal_layer'
layer: 'ProposalLayer'
param_str: "'feat_stride': 16"
}
}
#========= RCNN ============
layer {
name: "roi_pool5"
type: "ROIPooling"
bottom: "conv5_3"
bottom: "rois"
top: "pool5"
roi_pooling_param {
pooled_w: 7
pooled_h: 7
spatial_scale: 0.0625 # 1/16
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
inner_product_param {
num_output: 21
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
inner_product_param {
num_output: 84
}
}
layer {
name: "cls_prob"
type: "Softmax"
bottom: "cls_score"
top: "cls_prob"
}
复制代码
层名+shape:data (1, 3, 600, 989)
层名+shape:im_info (1, 3)
层名+shape:conv1_1 (1, 64, 600, 989)
层名+shape:conv1_2 (1, 64, 600, 989)
层名+shape:pool1 (1, 64, 300, 495)
层名+shape:conv2_1 (1, 128, 300, 495)
层名+shape:conv2_2 (1, 128, 300, 495)
层名+shape:pool2 (1, 128, 150, 248)
层名+shape:conv3_1 (1, 256, 150, 248)
层名+shape:conv3_2 (1, 256, 150, 248)
层名+shape:conv3_3 (1, 256, 150, 248)
层名+shape:pool3 (1, 256, 75, 124)
层名+shape:conv4_1 (1, 512, 75, 124)
层名+shape:conv4_2 (1, 512, 75, 124)
层名+shape:conv4_3 (1, 512, 75, 124)
层名+shape:pool4 (1, 512, 38, 62)
层名+shape:conv5_1 (1, 512, 38, 62)
层名+shape:conv5_2 (1, 512, 38, 62)
层名+shape:conv5_3 (1, 512, 38, 62)
层名+shape:conv5_3_relu5_3_0_split_0 (1, 512, 38, 62)
层名+shape:conv5_3_relu5_3_0_split_1 (1, 512, 38, 62)
层名+shape:rpn/output (1, 512, 38, 62)
层名+shape:rpn/output_rpn_relu/3x3_0_split_0 (1, 512, 38, 62)
层名+shape:rpn/output_rpn_relu/3x3_0_split_1 (1, 512, 38, 62)
层名+shape:rpn_cls_score (1, 18, 38, 62)
层名+shape:rpn_bbox_pred (1, 36, 38, 62)
层名+shape:rpn_cls_score_reshape (1, 2, 342, 62)
层名+shape:rpn_cls_prob (1, 2, 342, 62)
层名+shape:rpn_cls_prob_reshape (1, 18, 38, 62)
层名+shape:rois (300, 5)
层名+shape:pool5 (300, 512, 7, 7)
层名+shape:fc6 (300, 4096)
层名+shape:fc7 (300, 4096)
层名+shape:fc7_relu7_0_split_0 (300, 4096)
层名+shape:fc7_relu7_0_split_1 (300, 4096)
层名+shape:cls_score (300, 21)
层名+shape:bbox_pred (300, 84)
层名+shape:cls_prob (300, 21)
层名+网络W与b:conv1_1 (64, 3, 3, 3) (64,)
层名+网络W与b:conv1_2 (64, 64, 3, 3) (64,)
层名+网络W与b:conv2_1 (128, 64, 3, 3) (128,)
层名+网络W与b:conv2_2 (128, 128, 3, 3) (128,)
层名+网络W与b:conv3_1 (256, 128, 3, 3) (256,)
层名+网络W与b:conv3_2 (256, 256, 3, 3) (256,)
层名+网络W与b:conv3_3 (256, 256, 3, 3) (256,)
层名+网络W与b:conv4_1 (512, 256, 3, 3) (512,)
层名+网络W与b:conv4_2 (512, 512, 3, 3) (512,)
层名+网络W与b:conv4_3 (512, 512, 3, 3) (512,)
层名+网络W与b:conv5_1 (512, 512, 3, 3) (512,)
层名+网络W与b:conv5_2 (512, 512, 3, 3) (512,)
层名+网络W与b:conv5_3 (512, 512, 3, 3) (512,)
层名+网络W与b:rpn_conv/3x3 (512, 512, 3, 3) (512,)
层名+网络W与b:rpn_cls_score (18, 512, 1, 1) (18,)
层名+网络W与b:rpn_bbox_pred (36, 512, 1, 1) (36,)
层名+网络W与b:fc6 (4096, 25088) (4096,)
层名+网络W与b:fc7 (4096, 4096) (4096,)
层名+网络W与b:cls_score (21, 4096) (21,)
层名+网络W与b:bbox_pred (84, 4096) (84,)
前向计算结果blobs_out:
['bbox_pred', 'cls_prob'] (300, 84) (300, 21)
关键层的分析
:
layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rois'
python_param {
module: '
rpn.proposal_layer
'
layer: 'ProposalLayer'
param_str: "'feat_stride': 16"
}
}
输入是:
rpn_cls_prob_reshape (1, 18, 38, 62)
rpn_bbox_pred (1, 36, 38, 62)
im_info (1, 3)
输出是:
rois (300, 5)
class
ProposalLayer(caffe.Layer)
:里面有生成9种anchor box
generate_anchors(base_size=16, ratios=[0.5, 1, 2],
scales=np.array([8, 16, 32]))
ratios就是高宽比H/W(或 长宽比) :
# ws 宽度是缩放后面积的开方 因为不一定是矩阵
ws = np.round(np.sqrt(size_ratios))
# 高度=宽度×缩放比例 ratios就是高宽比 np.round是四舍五入
hs = np.round(ws * ratios)
scales是缩放比例:
ws = w * scales
hs = h * scales
layer {
name: "roi_pool5"
type: "
ROIPooling
"
bottom: "conv5_3"
bottom: "rois"
top: "pool5"
roi_pooling_param {
pooled_w: 7
pooled_h: 7
spatial_scale: 0.0625 # 1/16
}
}
输入:
conv5_3 (1, 512, 38, 62)
rois (300, 5)
输出:
pool5 (300, 512, 7, 7)
ROI pooling总结:
(1)用于目标检测任务;
(2)允许我们对CNN中的feature map进行reuse;
(3)可以显著加速training和testing速度;
(4)允许end-to-end的形式训练目标检测系统。
作者:
xsoft
时间:
2020-2-3 15:49
谢谢老师提供的资料。
欢迎光临 东方耀AI技术分享 (http://www.ai111.vip/)
Powered by Discuz! X3.4