faster rcnn网络结构详解

东方耀 · 发表于 2019-9-8 09:32:43

faster rcnn网络结构详解

caffe网络在线可视化工具（需要翻墙）：http://ethereon.github.io/netscope/#/editor

name: "VGG_ILSVRC_16_layers"
input: "data"
input_shape {
dim: 1
dim: 3
dim: 600
dim: 989
}
input: "im_info"
input_shape {
dim: 1
dim: 3
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
convolution_param {
num_output: 64
pad: 1 kernel_size: 3
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
convolution_param {
num_output: 64
pad: 1 kernel_size: 3
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2 stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
convolution_param {
num_output: 128
pad: 1 kernel_size: 3
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
convolution_param {
num_output: 128
pad: 1 kernel_size: 3
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2 stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
convolution_param {
num_output: 256
pad: 1 kernel_size: 3
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
convolution_param {
num_output: 256
pad: 1 kernel_size: 3
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
convolution_param {
num_output: 256
pad: 1 kernel_size: 3
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2 stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2 stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
convolution_param {
num_output: 512
pad: 1 kernel_size: 3
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
#========= RPN ============
layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "conv5_3"
top: "rpn/output"
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}
layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
}
}
layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
}
}
layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}
#========= RoI Proposal ============
layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}
layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}
layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rois'
python_param {
module: 'rpn.proposal_layer'
layer: 'ProposalLayer'
param_str: "'feat_stride': 16"
}
}
#========= RCNN ============
layer {
name: "roi_pool5"
type: "ROIPooling"
bottom: "conv5_3"
bottom: "rois"
top: "pool5"
roi_pooling_param {
pooled_w: 7
pooled_h: 7
spatial_scale: 0.0625 # 1/16
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
inner_product_param {
num_output: 21
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
inner_product_param {
num_output: 84
}
}
layer {
name: "cls_prob"
type: "Softmax"
bottom: "cls_score"
top: "cls_prob"
}

复制代码

层名+shape：data       (1, 3, 600, 989)
层名+shape：im_info       (1, 3)
层名+shape：conv1_1       (1, 64, 600, 989)
层名+shape：conv1_2       (1, 64, 600, 989)
层名+shape：pool1       (1, 64, 300, 495)
层名+shape：conv2_1       (1, 128, 300, 495)
层名+shape：conv2_2       (1, 128, 300, 495)
层名+shape：pool2       (1, 128, 150, 248)
层名+shape：conv3_1       (1, 256, 150, 248)
层名+shape：conv3_2       (1, 256, 150, 248)
层名+shape：conv3_3       (1, 256, 150, 248)
层名+shape：pool3       (1, 256, 75, 124)
层名+shape：conv4_1       (1, 512, 75, 124)
层名+shape：conv4_2       (1, 512, 75, 124)
层名+shape：conv4_3       (1, 512, 75, 124)
层名+shape：pool4       (1, 512, 38, 62)
层名+shape：conv5_1       (1, 512, 38, 62)
层名+shape：conv5_2       (1, 512, 38, 62)
层名+shape：conv5_3       (1, 512, 38, 62)
层名+shape：conv5_3_relu5_3_0_split_0       (1, 512, 38, 62)
层名+shape：conv5_3_relu5_3_0_split_1       (1, 512, 38, 62)
层名+shape：rpn/output       (1, 512, 38, 62)
层名+shape：rpn/output_rpn_relu/3x3_0_split_0       (1, 512, 38, 62)
层名+shape：rpn/output_rpn_relu/3x3_0_split_1       (1, 512, 38, 62)
层名+shape：rpn_cls_score       (1, 18, 38, 62)
层名+shape：rpn_bbox_pred       (1, 36, 38, 62)
层名+shape：rpn_cls_score_reshape       (1, 2, 342, 62)
层名+shape：rpn_cls_prob       (1, 2, 342, 62)
层名+shape：rpn_cls_prob_reshape       (1, 18, 38, 62)
层名+shape：rois       (300, 5)
层名+shape：pool5       (300, 512, 7, 7)
层名+shape：fc6       (300, 4096)
层名+shape：fc7       (300, 4096)
层名+shape：fc7_relu7_0_split_0       (300, 4096)
层名+shape：fc7_relu7_0_split_1       (300, 4096)
层名+shape：cls_score       (300, 21)
层名+shape：bbox_pred       (300, 84)
层名+shape：cls_prob       (300, 21)

层名+网络W与b：conv1_1       (64, 3, 3, 3) (64,)
层名+网络W与b：conv1_2       (64, 64, 3, 3) (64,)
层名+网络W与b：conv2_1       (128, 64, 3, 3) (128,)
层名+网络W与b：conv2_2       (128, 128, 3, 3) (128,)
层名+网络W与b：conv3_1       (256, 128, 3, 3) (256,)
层名+网络W与b：conv3_2       (256, 256, 3, 3) (256,)
层名+网络W与b：conv3_3       (256, 256, 3, 3) (256,)
层名+网络W与b：conv4_1       (512, 256, 3, 3) (512,)
层名+网络W与b：conv4_2       (512, 512, 3, 3) (512,)
层名+网络W与b：conv4_3       (512, 512, 3, 3) (512,)
层名+网络W与b：conv5_1       (512, 512, 3, 3) (512,)
层名+网络W与b：conv5_2       (512, 512, 3, 3) (512,)
层名+网络W与b：conv5_3       (512, 512, 3, 3) (512,)
层名+网络W与b：rpn_conv/3x3       (512, 512, 3, 3) (512,)
层名+网络W与b：rpn_cls_score       (18, 512, 1, 1) (18,)
层名+网络W与b：rpn_bbox_pred       (36, 512, 1, 1) (36,)
层名+网络W与b：fc6       (4096, 25088) (4096,)
层名+网络W与b：fc7       (4096, 4096) (4096,)
层名+网络W与b：cls_score       (21, 4096) (21,)
层名+网络W与b：bbox_pred       (84, 4096) (84,)
前向计算结果blobs_out:
['bbox_pred', 'cls_prob'] (300, 84) (300, 21)

关键层的分析：
layer {
  name: 'proposal'
  type: 'Python'
  bottom: 'rpn_cls_prob_reshape'
  bottom: 'rpn_bbox_pred'
  bottom: 'im_info'
  top: 'rois'
  python_param {
module: 'rpn.proposal_layer'
layer: 'ProposalLayer'
param_str: "'feat_stride': 16"
  }
}
输入是：
rpn_cls_prob_reshape       (1, 18, 38, 62)
rpn_bbox_pred       (1, 36, 38, 62)
im_info       (1, 3)
输出是：
rois       (300, 5)

class ProposalLayer(caffe.Layer):里面有生成9种anchor box

generate_anchors(base_size=16, ratios=[0.5, 1, 2],
                  scales=np.array([8, 16, 32]))

ratios就是高宽比H/W(或长宽比) ：
# ws 宽度是缩放后面积的开方因为不一定是矩阵
ws = np.round(np.sqrt(size_ratios))
# 高度=宽度×缩放比例 ratios就是高宽比  np.round是四舍五入
hs = np.round(ws * ratios)
scales是缩放比例：
ws = w * scales
hs = h * scales

layer {
  name: "roi_pool5"
  type: "ROIPooling"
  bottom: "conv5_3"
  bottom: "rois"
  top: "pool5"
  roi_pooling_param {
pooled_w: 7
pooled_h: 7
spatial_scale: 0.0625 # 1/16
  }
}输入：
conv5_3 (1, 512, 38, 62)
rois (300, 5)
输出：
pool5 (300, 512, 7, 7)
ROI pooling总结：
（1）用于目标检测任务；
（2）允许我们对CNN中的feature map进行reuse；
（3）可以显著加速training和testing速度；
（4）允许end-to-end的形式训练目标检测系统。

xsoft · 发表于 2020-2-3 15:49:10

谢谢老师提供的资料。

		自动登录	找回密码
密码			立即注册

[课堂笔记] faster rcnn网络结构详解