# relu的改进版
def leak_relu(self,x, alpha=0.1):
    return tf.maximum(alpha * x, x)
# 建立网络部分
def _build_net(self):
    x = tf.placeholder(tf.float32, [None, 448, 448, 3])
    with tf.variable_scope('yolo'):
        # _conv_layer(self, x, num_filters, filter_size, stride,scope)
        with tf.variable_scope('conv_2'):
            # (448,448,3)->(224,224,64)
            net = self._conv_layer(x, 64, 7, 2,'conv_2')
        # (224,224,64)->(112,112,64)
        net = self._maxpool_layer(net, 2, 2)
        with tf.variable_scope('conv_4'):
            # (112,112,64)->(112,112,192)
            net = self._conv_layer(net,  192, 3, 1,'conv_4')
        # (112,112,192)->(56,56,192)
        net = self._maxpool_layer(net, 2, 2)
        with tf.variable_scope('conv_6'):
            # (56,56,128)
            net = self._conv_layer(net, 128, 1, 1,'conv_6')
        with tf.variable_scope('conv_7'):
            # (56,56,256)
            net = self._conv_layer(net, 256, 3, 1,'conv_7')
        with tf.variable_scope('conv_8'):
            # (56,56,256)
            net = self._conv_layer(net, 256, 1, 1,'conv_8')
        with tf.variable_scope('conv_9'):
            # (56,56,512)
            net = self._conv_layer(net, 512, 3, 1,'conv_9')
        # (28,28,512)
        net = self._maxpool_layer(net, 2, 2)
        with tf.variable_scope('conv_11'):
            net = self._conv_layer(net, 256, 1, 1,'conv_11')
        with tf.variable_scope('conv_12'):
            net = self._conv_layer(net, 512, 3, 1,'conv_12')
        with tf.variable_scope('conv_13'):
            net = self._conv_layer(net, 256, 1, 1,'conv_13')
        with tf.variable_scope('conv_14'):
            net = self._conv_layer(net, 512, 3, 1,'conv_14')
        with tf.variable_scope('conv_15'):
            net = self._conv_layer(net, 256, 1, 1,'conv_15')
        with tf.variable_scope('conv_16'):
            net = self._conv_layer(net, 512, 3, 1,'conv_16')
        with tf.variable_scope('conv_17'):
            net = self._conv_layer(net, 256, 1, 1,'conv_17')
        with tf.variable_scope('conv_18'):
            net = self._conv_layer(net, 512, 3, 1,'conv_18')
        with tf.variable_scope('conv_19'):
            net = self._conv_layer(net, 512, 1, 1,'conv_19')
        with tf.variable_scope('conv_20'):
            net = self._conv_layer(net, 1024, 3, 1,'conv_20')
        # (14,14,512)
        net = self._maxpool_layer(net, 2, 2)
        with tf.variable_scope('conv_22'):
            net = self._conv_layer(net,  512, 1, 1,'conv_22')
        with tf.variable_scope('conv_23'):
            net = self._conv_layer(net,  1024, 3, 1,'conv_23')
        with tf.variable_scope('conv_24'):
            net = self._conv_layer(net,  512, 1, 1,'conv_24')
        with tf.variable_scope('conv_25'):
            net = self._conv_layer(net,  1024, 3, 1,'conv_25')
        with tf.variable_scope('conv_26'):
            net = self._conv_layer(net,  1024, 3, 1,'conv_26')
        with tf.variable_scope('conv_28'):
            # (7,7,1024)
            net = self._conv_layer(net,  1024, 3, 2,'conv_28')
        with tf.variable_scope('conv_29'):
            net = self._conv_layer(net,  1024, 3, 1,'conv_29')
        with tf.variable_scope('conv_30'):
            net = self._conv_layer(net,  1024, 3, 1,'conv_30')
        net = self._flatten(net)
        # (7x7x512,512)
        with tf.variable_scope('fc_33'):
            net = self._fc_layer(net,  512, activation=self.leak_relu,scope='fc_33')
        with tf.variable_scope('fc_34'):
            net = self._fc_layer(net, 4096, activation=self.leak_relu,scope='fc_34')
        with tf.variable_scope('fc_36'):
            net = self._fc_layer(net, 7*7*30,scope='fc_36')
    # 其返回了placeholder_x和(7,7,30)net
    return net,x
# 生成卷积层
def _conv_layer(self, x, num_filters, filter_size, stride,scope):
    # 生成卷积层的weights
    in_channels = x.get_shape().as_list()[-1]
    weight = tf.Variable(tf.truncated_normal([filter_size, filter_size,
                                                in_channels, num_filters], stddev=0.1),name='weights')
    # 生成卷积层的bias
    bias = tf.Variable(tf.zeros([num_filters,]),name='biases')
    # 计算要padding的量,
    pad_size = filter_size // 2
    pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]])
    x_pad = tf.pad(x, pad_mat)
    # 卷积
    conv = tf.nn.conv2d(x_pad, weight, strides=[1, stride, stride, 1], padding="VALID",name=scope)
    # 经过优化后的relu
    output = self.leak_relu(tf.nn.bias_add(conv, bias))
    return output
def _fc_layer(self, x,  num_out, activation=None,scope=None):
    # 全连接层
    num_in = x.get_shape().as_list()[-1]
    weight = tf.Variable(tf.truncated_normal([num_in, num_out], stddev=0.1),name='weights')
    bias = tf.Variable(tf.zeros([num_out,]),name='biases')
    output = tf.nn.xw_plus_b(x, weight, bias,name=scope)
    if activation:
        output = activation(output)
    return output
def _maxpool_layer(self, x,  pool_size, stride):
    # 最大池化
    output = tf.nn.max_pool(x, [1, pool_size, pool_size, 1],
                            strides=[1, stride, stride, 1], padding="SAME")
    return output
def _flatten(self, x):
    """flatten the x"""
    tran_x = tf.transpose(x, [0, 3, 1, 2])  # channle first mode
    nums = np.product(x.get_shape().as_list()[1:])
    return tf.reshape(tran_x, [-1, nums])




YOLOv2使用了一个新的分类网络作为特征提取部分,网络使用了较多的3 x 3卷积核,在每一次池化操作后把通道数翻倍。借鉴了network in network的思想,把1 x 1的卷积核置于3 x 3的卷积核之间,用来压缩特征。使用batch normalization稳定模型训练,加速收敛,正则化模型。




def decode(self,net):
    self.anchor_size = tf.constant(self.anchor_size,tf.float32)
    # net的shape为[batch,169,5,85]
    net = tf.reshape(net, [-1, 13 * 13, self.num_anchors, self.num_class + 5]) 
    # 85 里面 0、1为xy的偏移量,2、3是wh的偏移量,4是置信度,5->84是每个种类的概率
    # 偏移量、置信度、类别
    # 中心坐标相对于该cell坐上角的偏移量,sigmoid函数归一化到(0,1)
    # [batch,169,5,2]
    xy_offset = tf.nn.sigmoid(net[:, :, :, 0:2])
    wh_offset = tf.exp(net[:, :, :, 2:4])
    obj_probs = tf.nn.sigmoid(net[:, :, :, 4])
    class_probs = tf.nn.softmax(net[:, :, :, 5:])  
    # 在feature map对应坐标生成anchors,13,13
    height_index = tf.range(self.feature_map_size[0], dtype=tf.float32)
    width_index = tf.range(self.feature_map_size[1], dtype=tf.float32)
    x_cell, y_cell = tf.meshgrid(height_index, width_index)
    x_cell = tf.reshape(x_cell, [1, -1, 1])  # 和上面[H*W,num_anchors,num_class+5]对应
    y_cell = tf.reshape(y_cell, [1, -1, 1])
    # x_cell和y_cell是网格分割中心
    # xy_offset是相对中心的偏移情况
    bbox_x = (x_cell + xy_offset[:, :, :, 0]) / 13
    bbox_y = (y_cell + xy_offset[:, :, :, 1]) / 13
    bbox_w = (self.anchor_size[:, 0] * wh_offset[:, :, :, 0]) / 13
    bbox_h = (self.anchor_size[:, 1] * wh_offset[:, :, :, 1]) / 13
    bboxes = tf.stack([bbox_x - bbox_w / 2, bbox_y - bbox_h / 2, bbox_x + bbox_w / 2, bbox_y + bbox_h / 2],
    return bboxes, obj_probs, class_probs


    def conv2d(self,x,filters_num,filters_size,pad_size=0,stride=1,batch_normalize=True,activation=leaky_relu,use_bias=False,name='conv2d'):
        # 是否进行pad
        if pad_size > 0:
            x = tf.pad(x,[[0,0],[pad_size,pad_size],[pad_size,pad_size],[0,0]])
        # pad后进行卷积
        out = tf.layers.conv2d(x,filters=filters_num,kernel_size=filters_size,strides=stride,padding='VALID',activation=None,use_bias=use_bias,name=name)
        # BN应该在卷积层conv和激活函数activation之间,
        # (后面有BN层的conv就不用偏置bias,并激活函数activation在后)
        # 如果需要标准化则进行标准化
        if batch_normalize:
            out = tf.layers.batch_normalization(out,axis=-1,momentum=0.9,training=False,name=name+'_bn')
        if activation:
            out = activation(out)
        return out
    def maxpool(self,x, size=2, stride=2, name='maxpool'):
        return tf.layers.max_pooling2d(x, pool_size=size, strides=stride,name=name)
    def passthrough(self,x, stride):
        # 变小变长
        return tf.space_to_depth(x, block_size=stride)
    def darknet(self):
        x = tf.placeholder(dtype=tf.float32,shape=[None,416,416,3])
        # 416,416,3 -> 416,416,32
        net = self.conv2d(x, filters_num=32, filters_size=3, pad_size=1,
        # 416,416,32 -> 208,208,32
        net = self.maxpool(net, size=2, stride=2, name='pool1')
        # 208,208,32 -> 208,208,64
        net = self.conv2d(net, 64, 3, 1, name='conv2')
        # 208,208,64 -> 104,104,64
        net = self.maxpool(net, 2, 2, name='pool2')
        # 104,104,64 -> 104,104,128
        net = self.conv2d(net, 128, 3, 1, name='conv3_1')
        net = self.conv2d(net, 64, 1, 0, name='conv3_2')
        net = self.conv2d(net, 128, 3, 1, name='conv3_3')
        # 104,104,128 -> 52,52,128
        net = self.maxpool(net, 2, 2, name='pool3')
        net = self.conv2d(net, 256, 3, 1, name='conv4_1')
        net = self.conv2d(net, 128, 1, 0, name='conv4_2')
        net = self.conv2d(net, 256, 3, 1, name='conv4_3')
        # 52,52,128 -> 26,26,256
        net = self.maxpool(net, 2, 2, name='pool4')
        # 26,26,256-> 26,26,512
        net = self.conv2d(net, 512, 3, 1, name='conv5_1')
        net = self.conv2d(net, 256, 1, 0, name='conv5_2')
        net = self.conv2d(net, 512, 3, 1, name='conv5_3')
        net = self.conv2d(net, 256, 1, 0, name='conv5_4')
        net = self.conv2d(net, 512, 3, 1, name='conv5_5') 
        # 这一层特征图,要进行后面passthrough,保留一层特征层
        shortcut = net
        # 26,26,512-> 13,13,512
        net = self.maxpool(net, 2, 2, name='pool5')  #
        # 13,13,512-> 13,13,1024
        net = self.conv2d(net, 1024, 3, 1, name='conv6_1')
        net = self.conv2d(net, 512, 1, 0, name='conv6_2')
        net = self.conv2d(net, 1024, 3, 1, name='conv6_3')
        net = self.conv2d(net, 512, 1, 0, name='conv6_4')
        net = self.conv2d(net, 1024, 3, 1, name='conv6_5')
        在后面增加了三个卷积核尺寸为3 * 3,卷积核数量为1024的卷积层,
        并在这三个卷积层的最后一层后面跟一个卷积核尺寸为1 * 1的卷积层,
        卷积核数量是(B * (5 + C))。
        对于VOC数据集,卷积层输入图像尺寸为416 * 416时最终输出是13 * 13个栅格,
        所以输出维度是13 * 13 * 5 * (5 + 20)= 13 * 13 * 125。
        检测网络加入了passthrough layer,
        从最后一个输出为26 * 26 * 512的卷积层连接到新加入的三个卷积核尺寸为3 * 3的卷积层的第二层,使模型有了细粒度特征。
        # 下面这部分主要是training for detection
        net = self.conv2d(net, 1024, 3, 1, name='conv7_1')
        # 13,13,1024-> 13,13,1024
        net = self.conv2d(net, 1024, 3, 1, name='conv7_2')
        # shortcut增加了一个中间卷积层,先采用64个1*1卷积核进行卷积,然后再进行passthrough处理
        # 这样26*26*512 -> 26*26*64 -> 13*13*256的特征图
        shortcut = self.conv2d(shortcut, 64, 1, 0, name='conv_shortcut')
        shortcut = self.passthrough(shortcut, 2)
        # 连接之后,变成13*13*(1024+256)
        net = tf.concat([shortcut, net],axis=-1)  
        # channel整合到一起,concatenated with the original features,passthrough层与ResNet网络的shortcut类似,以前面更高分辨率的特征图为输入,然后将其连接到后面的低分辨率特征图上,
        net = self.conv2d(net, 1024, 3, 1, name='conv8')
        # detection layer: 最后用一个1*1卷积去调整channel,该层没有BN层和激活函数,变成: S*S*(B*(5+C)),在这里为:13*13*425
        output = self.conv2d(net, filters_num=self.f_num, filters_size=1, batch_normalize=False, activation=None,
                        use_bias=True, name='conv_dec')
        return output,x





1、使用了残差网络Residual,残差卷积就是进行一次3X3的卷积,然后保存该卷积layer,再进行一次1X1的卷积和一次3X3的卷积,并把这个结果加上layer作为最后的结果, 残差网络的特点是容易优化,并且能够通过增加相当的深度来提高准确率。其内部的残差块使用了跳跃连接,缓解了在深度神经网络中增加深度带来的梯度消失问题。



# l2 正则化
def _batch_normalization_layer(self, input_layer, name = None, training = True, norm_decay = 0.99, norm_epsilon = 1e-3):
        对卷积层提取的feature map使用batch normalization
        input_layer: 输入的四维tensor
        name: batchnorm层的名字
        trainging: 是否为训练过程
        norm_decay: 在预测时计算moving average时的衰减率
        norm_epsilon: 方差加上极小的数,防止除以0的情况
        bn_layer: batch normalization处理之后的feature map
    bn_layer = tf.layers.batch_normalization(inputs = input_layer,
        momentum = norm_decay, epsilon = norm_epsilon, center = True,
        scale = True, training = training, name = name)
    return tf.nn.leaky_relu(bn_layer, alpha = 0.1)
# 这个就是用来进行卷积的
def _conv2d_layer(self, inputs, filters_num, kernel_size, name, use_bias = False, strides = 1):
        经过卷积之后需要进行batch norm,最后使用leaky ReLU激活函数
        比如,输入图片的大小为416*416,卷积核大小为3,若stride为2时,(416 - 3 + 2)/ 2 + 1, 计算结果为208,相当于做了池化层处理
        因此需要对stride大于1的时候,先进行一个padding操作, 采用四周都padding一维代替'same'方式
        inputs: 输入变量
        filters_num: 卷积核数量
        strides: 卷积步长
        name: 卷积层名字
        trainging: 是否为训练过程
        use_bias: 是否使用偏置项
        kernel_size: 卷积核大小
        conv: 卷积之后的feature map
    conv = tf.layers.conv2d(
        inputs = inputs, filters = filters_num,
        kernel_size = kernel_size, strides = [strides, strides], kernel_initializer = tf.glorot_uniform_initializer(),
        padding = ('SAME' if strides == 1 else 'VALID'), kernel_regularizer = tf.contrib.layers.l2_regularizer(scale = 5e-4), use_bias = use_bias, name = name)
    return conv
# 这个用来进行残差卷积的
# 残差卷积就是进行一次3X3的卷积,然后保存该卷积layer
# 再进行一次1X1的卷积和一次3X3的卷积,并把这个结果加上layer作为最后的结果
def _Residual_block(self, inputs, filters_num, blocks_num, conv_index, training = True, norm_decay = 0.99, norm_epsilon = 1e-3):
        inputs: 输入变量
        filters_num: 卷积核数量
        trainging: 是否为训练过程
        blocks_num: block的数量
        conv_index: 为了方便加载预训练权重,统一命名序号
        weights_dict: 加载预训练模型的权重
        norm_decay: 在预测时计算moving average时的衰减率
        norm_epsilon: 方差加上极小的数,防止除以0的情况
        inputs: 经过残差网络处理后的结果
    # 在输入feature map的长宽维度进行padding
    inputs = tf.pad(inputs, paddings=[[0, 0], [1, 0], [1, 0], [0, 0]], mode='CONSTANT')
    layer = self._conv2d_layer(inputs, filters_num, kernel_size = 3, strides = 2, name = "conv2d_" + str(conv_index))
    layer = self._batch_normalization_layer(layer, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
    conv_index += 1
    for _ in range(blocks_num):
        shortcut = layer
        layer = self._conv2d_layer(layer, filters_num // 2, kernel_size = 1, strides = 1, name = "conv2d_" + str(conv_index))
        layer = self._batch_normalization_layer(layer, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
        conv_index += 1
        layer = self._conv2d_layer(layer, filters_num, kernel_size = 3, strides = 1, name = "conv2d_" + str(conv_index))
        layer = self._batch_normalization_layer(layer, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
        conv_index += 1
        layer += shortcut
    return layer, conv_index
#   生成_darknet53和逆卷积层
def _darknet53(self, inputs, conv_index, training = True, norm_decay = 0.99, norm_epsilon = 1e-3):
        inputs: 模型输入变量
        conv_index: 卷积层数序号,方便根据名字加载预训练权重
        weights_dict: 预训练权重
        training: 是否为训练
        norm_decay: 在预测时计算moving average时的衰减率
        norm_epsilon: 方差加上极小的数,防止除以0的情况
        conv: 经过52层卷积计算之后的结果, 输入图片为416x416x3,则此时输出的结果shape为13x13x1024
        route1: 返回第26层卷积计算结果52x52x256, 供后续使用
        route2: 返回第43层卷积计算结果26x26x512, 供后续使用
        conv_index: 卷积层计数,方便在加载预训练模型时使用
    with tf.variable_scope('darknet53'):
        # 416,416,3 -> 416,416,32
        conv = self._conv2d_layer(inputs, filters_num = 32, kernel_size = 3, strides = 1, name = "conv2d_" + str(conv_index))
        conv = self._batch_normalization_layer(conv, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
        conv_index += 1
        # 416,416,32 -> 208,208,64
        conv, conv_index = self._Residual_block(conv, conv_index = conv_index, filters_num = 64, blocks_num = 1, training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
        # 208,208,64 -> 104,104,128
        conv, conv_index = self._Residual_block(conv, conv_index = conv_index, filters_num = 128, blocks_num = 2, training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
        # 104,104,128 -> 52,52,256
        conv, conv_index = self._Residual_block(conv, conv_index = conv_index, filters_num = 256, blocks_num = 8, training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
        # route1 = 52,52,256
        route1 = conv
        # 52,52,256 -> 26,26,512
        conv, conv_index = self._Residual_block(conv, conv_index = conv_index, filters_num = 512, blocks_num = 8, training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
        # route2 = 26,26,512
        route2 = conv
        # 26,26,512 -> 13,13,1024
        conv, conv_index = self._Residual_block(conv, conv_index = conv_index,  filters_num = 1024, blocks_num = 4, training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
        # route3 = 13,13,1024
    return  route1, route2, conv, conv_index
# 输出两个网络结果
# 第一个是进行5次卷积后,用于下一次逆卷积的,卷积过程是1X1,3X3,1X1,3X3,1X1
# 第二个是进行5+2次卷积,作为一个特征层的,卷积过程是1X1,3X3,1X1,3X3,1X1,3X3,1X1
def _yolo_block(self, inputs, filters_num, out_filters, conv_index, training = True, norm_decay = 0.99, norm_epsilon = 1e-3):
        yolo3在Darknet53提取的特征层基础上,又加了针对3种不同比例的feature map的block,这样来提高对小物体的检测率
        inputs: 输入特征
        filters_num: 卷积核数量
        out_filters: 最后输出层的卷积核数量
        conv_index: 卷积层数序号,方便根据名字加载预训练权重
        training: 是否为训练
        norm_decay: 在预测时计算moving average时的衰减率
        norm_epsilon: 方差加上极小的数,防止除以0的情况
        route: 返回最后一层卷积的前一层结果
        conv: 返回最后一层卷积的结果
        conv_index: conv层计数
    conv = self._conv2d_layer(inputs, filters_num = filters_num, kernel_size = 1, strides = 1, name = "conv2d_" + str(conv_index))
    conv = self._batch_normalization_layer(conv, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
    conv_index += 1
    conv = self._conv2d_layer(conv, filters_num = filters_num * 2, kernel_size = 3, strides = 1, name = "conv2d_" + str(conv_index))
    conv = self._batch_normalization_layer(conv, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
    conv_index += 1
    conv = self._conv2d_layer(conv, filters_num = filters_num, kernel_size = 1, strides = 1, name = "conv2d_" + str(conv_index))
    conv = self._batch_normalization_layer(conv, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
    conv_index += 1
    conv = self._conv2d_layer(conv, filters_num = filters_num * 2, kernel_size = 3, strides = 1, name = "conv2d_" + str(conv_index))
    conv = self._batch_normalization_layer(conv, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
    conv_index += 1
    conv = self._conv2d_layer(conv, filters_num = filters_num, kernel_size = 1, strides = 1, name = "conv2d_" + str(conv_index))
    conv = self._batch_normalization_layer(conv, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
    conv_index += 1
    route = conv
    conv = self._conv2d_layer(conv, filters_num = filters_num * 2, kernel_size = 3, strides = 1, name = "conv2d_" + str(conv_index))
    conv = self._batch_normalization_layer(conv, name = "batch_normalization_" + str(conv_index), training = training, norm_decay = norm_decay, norm_epsilon = norm_epsilon)
    conv_index += 1
    conv = self._conv2d_layer(conv, filters_num = out_filters, kernel_size = 1, strides = 1, name = "conv2d_" + str(conv_index), use_bias = True)
    conv_index += 1
    return route, conv, conv_index
# 返回三个特征层的内容
def yolo_inference(self, inputs, num_anchors, num_classes, training = True):
        inputs: 模型的输入变量
        num_anchors: 每个grid cell负责检测的anchor数量
        num_classes: 类别数量
        training: 是否为训练模式
    conv_index = 1
    # route1 = 52,52,256、route2 = 26,26,512、route3 = 13,13,1024
    conv2d_26, conv2d_43, conv, conv_index = self._darknet53(inputs, conv_index, training = training, norm_decay = self.norm_decay, norm_epsilon = self.norm_epsilon)
    with tf.variable_scope('yolo'):
        #   获得第一个特征层
        # conv2d_57 = 13,13,512,conv2d_59 = 13,13,255(3x(80+5))
        conv2d_57, conv2d_59, conv_index = self._yolo_block(conv, 512, num_anchors * (num_classes + 5), conv_index = conv_index, training = training, norm_decay = self.norm_decay, norm_epsilon = self.norm_epsilon)
        #   获得第二个特征层
        conv2d_60 = self._conv2d_layer(conv2d_57, filters_num = 256, kernel_size = 1, strides = 1, name = "conv2d_" + str(conv_index))
        conv2d_60 = self._batch_normalization_layer(conv2d_60, name = "batch_normalization_" + str(conv_index),training = training, norm_decay = self.norm_decay, norm_epsilon = self.norm_epsilon)
        conv_index += 1
        # unSample_0 = 26,26,256
        unSample_0 = tf.image.resize_nearest_neighbor(conv2d_60, [2 * tf.shape(conv2d_60)[1], 2 * tf.shape(conv2d_60)[1]], name='upSample_0')
        # route0 = 26,26,768
        route0 = tf.concat([unSample_0, conv2d_43], axis = -1, name = 'route_0')
        # conv2d_65 = 52,52,256,conv2d_67 = 26,26,255
        conv2d_65, conv2d_67, conv_index = self._yolo_block(route0, 256, num_anchors * (num_classes + 5), conv_index = conv_index, training = training, norm_decay = self.norm_decay, norm_epsilon = self.norm_epsilon)
        #   获得第三个特征层
        conv2d_68 = self._conv2d_layer(conv2d_65, filters_num = 128, kernel_size = 1, strides = 1, name = "conv2d_" + str(conv_index))
        conv2d_68 = self._batch_normalization_layer(conv2d_68, name = "batch_normalization_" + str(conv_index), training=training, norm_decay=self.norm_decay, norm_epsilon = self.norm_epsilon)
        conv_index += 1
        # unSample_1 = 52,52,128
        unSample_1 = tf.image.resize_nearest_neighbor(conv2d_68, [2 * tf.shape(conv2d_68)[1], 2 * tf.shape(conv2d_68)[1]], name='upSample_1')
        # route1= 52,52,384
        route1 = tf.concat([unSample_1, conv2d_26], axis = -1, name = 'route_1')
        # conv2d_75 = 52,52,255
        _, conv2d_75, _ = self._yolo_block(route1, 128, num_anchors * (num_classes + 5), conv_index = conv_index, training = training, norm_decay = self.norm_decay, norm_epsilon = self.norm_epsilon)
    return [conv2d_59, conv2d_67, conv2d_75]












# =============================网络部分============================= #
#   该部分供SSDNet的net函数调用,用于建立网络                 #
#   返回predictions, localisations, logits, end_points     #
def ssd_net(inputs,
    """SSD net definition.
    # 建立网络
    end_points = {}
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # Block1
        net = self.conv2d(x,64,[3,3],scope = 'conv1_1')
        net = self.conv2d(net,64,[3,3],scope = 'conv1_2')
        # (300,300,3) -> (300,300,64) -> (150,150,64) 
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        end_points['block1'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = self.conv2d(net,128,[3,3],scope = 'conv2_1')
        net = self.conv2d(net,128,[3,3],scope = 'conv2_2')
        # (150,150,64) -> (150,150,128) -> (75,75,128)
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        end_points['block2'] = net
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = self.conv2d(net,256,[3,3],scope = 'conv3_1')
        net = self.conv2d(net,256,[3,3],scope = 'conv3_2')
        net = self.conv2d(net,256,[3,3],scope = 'conv3_3')
        # (75,75,128) -> (75,75,256) -> (38,38,256)
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        end_points['block3'] = net
        net = slim.max_pool2d(net, [2, 2],stride = 2,padding = "SAME", scope='pool3')
        # Block 4.
        # 三次卷积
        # (38,38,256) -> (38,38,512) -> block4_net -> (19,19,512)
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        end_points['block4'] = net
        net = slim.max_pool2d(net, [2, 2],padding = "SAME", scope='pool4')
        # Block 5.
        # 三次卷积
        # (19,19,512)->(19,19,512)
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        end_points['block5'] = net
        net = slim.max_pool2d(net, [3, 3], stride=1,padding = "SAME", scope='pool5')
        # Block 6: dilate
        # 卷积核膨胀
        # (19,19,512)->(19,19,1024)
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        end_points['block6'] = net
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        # Block 7: 1x1 conv
        # (19,19,1024)->(19,19,1024)
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        end_points['block7'] = net
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        # (19,19,1024)->(19,19,256)->(10,10,512)
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        # (10,10,512)->(10,10,128)->(5,5,256)
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        # (5,5,256)->(5,5,128)->(3,3,256)
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        # (3,3,256)->(1,1,256)
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        # 预测和定位层
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                p, l = ssd_multibox_layer(end_points[layer],
        return predictions, localisations, logits, end_points
ssd_net.default_image_size = 300






