Stacked Hourglass Networks


来自密歇根大学的研究团队设计的Stacked hourglass network是一种专门用于人体姿态估计的网络结构,曾在MPII竞赛中暂列榜首,目前排名第七,排名在他之前的部分方法借鉴了hourglass的网络结构,并进行了改进,可以说hourglass的网络结构算是受到了业界的认可。


如下图所示,Stacked hourglass network可以翻译成堆叠沙漏网络,首先进行卷积池化处理,并进行多次下采样操作,获得一些分辨率较低的特征,从而使计算复杂度降低。为了使图像特征的分辨率上升,紧接着进行多次上采样。上采样操作使得图像的分辨率增高,同时更有能力预测物体的准确位置。通过这样一种处理,相较于其他网络,该网络结构能够通过增大感受野的操作来获得更大多的上下文信息。


每一个stack里包含一个hourglass,hourglass的结构如下图所示



每个hourglass里包含若干个residual,在上图的hourglass结构中为四个。residual的结构如下图所示,直线里的操作为先进行下采样再进行上采样,虚线里的操作可以为在原尺度下的卷积操作,也可以不做任何操作,最终对两条线的输出进行相加操作。



用TensorFlow实现hourglass:

首先定义一些基本的操作函数:

def conv_bn_relu(inputs, filters, kernel_size = 1, strides = 1, pad = 'VALID', name = 'conv_bn_relu'):

    kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights')
    conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding='VALID', data_format='NHWC')
    norm = tf.contrib.layers.batch_norm(conv, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = training)

    return norm


def conv_block(inputs, numOut, name = 'conv_block'):

    with tf.name_scope('norm_1'):
        norm_1 = tf.contrib.layers.batch_norm(inputs, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = training)
        conv_1 = conv2d(norm_1, int(numOut/2), kernel_size=1, strides=1, pad = 'VALID', name= 'conv')
    with tf.name_scope('norm_2'):
        norm_2 = tf.contrib.layers.batch_norm(conv_1, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = training)
        pad = tf.pad(norm_2, np.array([[0,0],[1,1],[1,1],[0,0]]), name= 'pad')
        conv_2 = conv2d(pad, int(numOut/2), kernel_size=3, strides=1, pad = 'VALID', name= 'conv')
    with tf.name_scope('norm_3'):
        norm_3 = tf.contrib.layers.batch_norm(conv_2, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = training)
        conv_3 = conv2d(norm_3, int(numOut), kernel_size=1, strides=1, pad = 'VALID', name= 'conv')
    return conv_3


def conv2d(inputs, filters, kernel_size = 1, strides = 1, pad = 'VALID', name = 'conv'):

    kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights')
    conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding=pad, data_format='NHWC')

    return conv


接着定义residual操作:

def residual(inputs, numOut, name = 'residual_block'):

    convb = conv_block(inputs, numOut)
    skipl = skip_layer(inputs, numOut)
    return tf.add_n([convb, skipl], name = 'res_block')


def skip_layer(inputs, numOut, name = 'skip_layer'):

    with tf.name_scope(name):
        if inputs.get_shape().as_list()[3] == numOut:
            return inputs
        else:
            conv = conv2d(inputs, numOut, kernel_size=1, strides = 1, name = 'conv')
            return conv 



然后是hourglass操作:

def hourglass(inputs, n, numOut, name = 'hourglass'):

    with tf.name_scope(name):
        # Upper Branch
        up_1 = residual(inputs, numOut, name = 'up_1')
        # Lower Branch
        low_ = tf.contrib.layers.max_pool2d(inputs, [2,2], [2,2], padding='VALID')
        low_1= residual(low_, numOut, name = 'low_1')
        
        if n > 0:
            low_2 = hourglass(low_1, n-1, numOut, name = 'low_2')
        else:
            low_2 = residual(low_1, numOut, name = 'low_2')
            
        low_3 = residual(low_2, numOut, name = 'low_3')
        # up_2 = tf.image.resize_nearest_neighbor(low_3, tf.shape(low_3)[1:3]*2, name = 'upsampling')
        up_2 = tf.image.resize_nearest_neighbor(low_3, tf.shape(up_1)[1:3], name = 'upsampling')
        # print up_2, up_1
        return tf.add_n([up_2,up_1], name='out_hg')

最后是整个network:

def net(inputs):
    # Input Dim : nbImages x 256 x 256 x 3
    pad1 = tf.pad(inputs, [[0,0],[2,2],[2,2],[0,0]], name='pad_1')
    # Dim pad1 : nbImages x 260 x 260 x 3
    conv1 = conv_bn_relu(pad1, filters= 64, kernel_size = 6, strides = 2, name = 'conv_256_to_128')
    # Dim conv1 : nbImages x 128 x 128 x 64
    r1 = residual(conv1, numOut = 128, name = 'r1')
    # Dim r1 : nbImages x 128 x 128 x 128
    pool1 = tf.contrib.layers.max_pool2d(r1, [2,2], [2,2], padding='VALID')
    # Dim pool1 : nbImages x 64 x 64 x 128
    r2 = residual(pool1, numOut= int(nFeat/2), name = 'r2')
    r3 = residual(r2, numOut= nFeat, name = 'r3')


    hg = hourglass(r3, nLow, nFeat, 'hourglass')
    drop = tf.layers.dropout(hg, rate = dropout_rate, training = training, name = 'dropout')
    ll = conv_bn_relu(drop, nFeat, 1,1, 'VALID', name = 'conv')
    ll_0 =  conv2d(ll, nFeat, 1, 1, 'VALID', 'll')

    out = conv2d(ll, outDim, 1, 1, 'VALID', 'out')
    tf.add_to_collection('heatmaps', out)
    out_ = conv2d(out, nFeat, 1, 1, 'VALID', 'out_')
    sum_ = tf.add_n([out_, ll, r3], name = 'merge')
    for _ in range(1, nStack -1):
        hg = hourglass(sum_, nLow, nFeat, 'hourglass')
        drop = tf.layers.dropout(hg, rate = dropout_rate, training = training, name = 'dropout')
        ll = conv_bn_relu(drop, nFeat, 1, 1, 'VALID', name= 'conv')
        ll_ = conv2d(ll, nFeat, 1, 1, 'VALID', 'll')
        out = conv2d(ll, outDim, 1, 1, 'VALID', 'out')
        tf.add_to_collection('heatmaps', out)

        out_ = conv2d(out, nFeat, 1, 1, 'VALID', 'out_')
        sum_ = tf.add_n([out_, sum_, ll_0], name= 'merge')


    with tf.name_scope('stage_' + str(nStack -1)):
        hg = hourglass(sum_, nLow, nFeat, 'hourglass')
        drop = tf.layers.dropout(hg, rate = dropout_rate, training = training, name = 'dropout')
        ll = conv_bn_relu(drop, nFeat, 1, 1, 'VALID', 'conv')
        out = conv2d(ll, outDim, 1,1, 'VALID', 'out')


    return out  

参考资料:

  1. http://blog.csdn.net/layumi1993/article/details/52459385
  2. http://blog.csdn.net/zziahgf/article/details/72732220
  3. http://blog.csdn.net/shenxiaolu1984/article/details/51428392
  4. https://github.com/wbenbihi/hourglasstensorlfow

猜你喜欢

转载自blog.csdn.net/yeahDeDiQiZhang/article/details/78578153
今日推荐