keras用xception网络实现猫狗识别

前言

最近想复现一下xception实现分类的任务，但是网络上只找的到xception的结构实现[1]或是像VGG[2]，Inception-v3[3]之类的迁移学习。没有代码能直接拿来用，那就学习一下，自己写写。最终目标当然不只是猫狗分类，不过可以用它来验证一下正确性。

如果完全不懂keras，可以去看一下这个教程视频，它的前提是懂得tensorflow或者theano。很基础，能有个大概的了解。

数据集

用的是最基础的猫狗识别数据集，不附链接了网速太差上传慢，这个很容易就可以下载到。

因为xception内部会自己处理图片的大小，所以数据集图片大小不作要求。但是目录结构得是下面这种方式[4]：

其中我的数据集中训练集猫狗各1000张，测试集各500张。

代码

本来想模仿[3]来写，但是找不到xception预训练好的pb文件，只能找到对应于keras的h5文件（top&notop），所以网络模型上参考了[1]的内容。虽然keras其实内部有封装好的xception，但是因为会有某些路径错误[5]，所以就直接使用了[1]。

from __future__ import print_function
from __future__ import absolute_import

import warnings
import numpy as np
import keras

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

from keras.models import Model, Sequential
from keras import layers
from keras.layers import Dense, Input, BatchNormalization, Activation, Flatten
from keras.layers import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.engine.topology import get_source_inputs
from keras.utils.data_utils import get_file
from keras import backend as K
from keras_applications.imagenet_utils import decode_predictions
from keras_applications.imagenet_utils import _obtain_input_shape
# from keras_applications.xception import Xception

TF_WEIGHTS_PATH = 'E:/files/my_file/master/deep/xception/weights/xception_weights_tf_dim_ordering_tf_kernels.h5'
TF_WEIGHTS_PATH_NO_TOP = 'E:/files/my_file/master/deep/xception/weights/xception_weights_tf_dim_ordering_tf_kernels_notop.h5'
# 如果xception后面再加其他层，就用WEIGHTS_PATH_NO_TOP，表示去掉最后的softmax
DATA_PATH = "E:/dl_data/cats_and_dogs_filtered/cats_dogs/"

def Xception(include_top=True, weights='imagenet',
             input_tensor=None, input_shape=None,
             pooling=None,
             classes=1000):
    if weights not in {'imagenet', None}:
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `imagenet` '
                         '(pre-training on ImageNet).')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError('If using `weights` as imagenet with `include_top`'
                         ' as true, `classes` should be 1000')

    if K.backend() != 'tensorflow':
        raise RuntimeError('The Xception model is only available with '
                           'the TensorFlow backend.')
    if K.image_data_format() != 'channels_last':
        warnings.warn('The Xception model is only available for the '
                      'input data format "channels_last" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'data format "channels_first" (channels, width, height). '
                      'You should set `image_data_format="channels_last"` in your Keras '
                      'config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "channels_last" data format.')
        K.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=299,
                                      min_size=71,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input)
    x = BatchNormalization(name='block1_conv1_bn')(x)
    x = Activation('relu', name='block1_conv1_act')(x)
    x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
    x = BatchNormalization(name='block1_conv2_bn')(x)
    x = Activation('relu', name='block1_conv2_act')(x)

    residual = Conv2D(128, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x)
    x = BatchNormalization(name='block2_sepconv1_bn')(x)
    x = Activation('relu', name='block2_sepconv2_act')(x)
    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x)
    x = BatchNormalization(name='block2_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x)
    x = layers.add([x, residual])

    residual = Conv2D(256, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block3_sepconv1_act')(x)
    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x)
    x = BatchNormalization(name='block3_sepconv1_bn')(x)
    x = Activation('relu', name='block3_sepconv2_act')(x)
    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x)
    x = BatchNormalization(name='block3_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x)
    x = layers.add([x, residual])

    residual = Conv2D(728, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block4_sepconv1_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x)
    x = BatchNormalization(name='block4_sepconv1_bn')(x)
    x = Activation('relu', name='block4_sepconv2_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x)
    x = BatchNormalization(name='block4_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x)
    x = layers.add([x, residual])

    for i in range(8):
        residual = x
        prefix = 'block' + str(i + 5)

        x = Activation('relu', name=prefix + '_sepconv1_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x)
        x = BatchNormalization(name=prefix + '_sepconv1_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv2_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x)
        x = BatchNormalization(name=prefix + '_sepconv2_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv3_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x)
        x = BatchNormalization(name=prefix + '_sepconv3_bn')(x)

        x = layers.add([x, residual])

    residual = Conv2D(1024, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block13_sepconv1_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x)
    x = BatchNormalization(name='block13_sepconv1_bn')(x)
    x = Activation('relu', name='block13_sepconv2_act')(x)
    x = SeparableConv2D(1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x)
    x = BatchNormalization(name='block13_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block13_pool')(x)
    x = layers.add([x, residual])

    x = SeparableConv2D(1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x)
    x = BatchNormalization(name='block14_sepconv1_bn')(x)
    x = Activation('relu', name='block14_sepconv1_act')(x)

    x = SeparableConv2D(2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x)
    x = BatchNormalization(name='block14_sepconv2_bn')(x)
    x = Activation('relu', name='block14_sepconv2_act')(x)

    if include_top:
        x = GlobalAveragePooling2D(name='avg_pool')(x)
        x = Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    model = Model(inputs, x, name='xception')

    if weights == 'imagenet':
        if include_top:
            weights_path = TF_WEIGHTS_PATH
        else:
            weights_path = TF_WEIGHTS_PATH_NO_TOP
        model.load_weights(weights_path)

    if old_data_format:
        K.set_image_data_format(old_data_format)
    return model

def preprocess_input(x):
    x /= 255.
    x -= 0.5
    x *= 2.
    return x

# 提取图片的bottleneck特征
def get_bottleneck_features():
    # 图像生成器初始化
    datagen = ImageDataGenerator(rescale=1. / 255)
    # 训练集图像生成器
    generator_train = datagen.flow_from_directory(DATA_PATH+"train",
                                                  target_size=(150, 150), batch_size=32, class_mode=None, shuffle=False)
    # 测试集图像生成器
    generator_test = datagen.flow_from_directory(DATA_PATH+"test",
                                                 target_size=(150, 150), batch_size=32, class_mode=None, shuffle=False)

    model = Xception(include_top=False, weights='imagenet')

    # 灌入pre-model的权重（在自己定义的xception中已灌入）
    # print("loading weights ----------------")
    # model.load_weights(TF_WEIGHTS_PATH_NO_TOP)
    # print("loading weights over!")

    # 得到bottleneck feature
    bottleneck_features_train = model.predict_generator(generator_train, 2000/32)
    # predict_generator(generator, steps, ...)
    # steps: 在声明一个 epoch 完成并开始下一个 epoch 之前从 generator 产生的总步数（批次样本）。
    # 它通常应该等于你的数据集的样本数量除以批量大小。(重要！)
    # 对于 Sequence，它是可选的：如果未指定，将使用len(generator) 作为步数。
    np.save((DATA_PATH + "bottleneck_features_train.npy"), bottleneck_features_train)

    bottleneck_features_test = model.predict_generator(generator_test, 1000/32)
    np.save((DATA_PATH + "bottleneck_features_test.npy"), bottleneck_features_test)
    print("over!")

if __name__ == '__main__':
    # 提取图片的bottleneck特征
    # get_bottleneck_features()

    # 导入bottleneck_feature数据
    train_data = np.load(DATA_PATH+"bottleneck_features_train.npy")
    print("train_data: ", np.shape(train_data))
    train_labels = np.array([0]*1000+[1]*1000) # 上述特征存储时未打乱，1000只猫1000只狗
    print("train_labels: ", np.shape(train_labels))
    test_data = np.load(DATA_PATH + "bottleneck_features_test.npy")
    test_labels = np.array([0] * 500 + [1] * 500)

    # 标签变成one_hot格式
    train_labels = keras.utils.to_categorical(train_labels, 2)
    test_labels = keras.utils.to_categorical(test_labels, 2)

    # 网络结构
    model = Sequential()
    model.add(Flatten(input_shape=(5,5,2048)))
    model.add(Dense(2, activation='softmax'))

    # 编译
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    print("Training --------------------")
    model.fit(train_data, train_labels, epochs=4, verbose=2, batch_size=100, shuffle=True)
    model.save_weights(DATA_PATH+"cats_dogs_bottleneck.h5")

    # 测试
    print("\nTesting --------------------")
    loss, accuracy = model.evaluate(test_data, test_labels)

    print("\ntest loss=", loss)
    print("\ntest accuracy=", accuracy)

上述代码中的其他部分参考[2]，原文代码直接copy过来有几个地方报错，因为[2]使用的是封装好的VGG而不是我这里的自定义的Xception。还有np.save()的报错问题，均已在上述代码中修改。

（其他细节方面改天再更）

运行结果

训练：

测试：

补充

如果要在xception后面再加其它层，比如[6]，只需把上述代码中网络结构部分改成下面即可：

# 网络结构
    model = Sequential()
    # print("before lstm: ", model.get_layer(index=0).output_shape)
    # lstm层（单层）
    model.add(LSTM(units=n_hidden_unis, input_shape=(n_steps, n_inputs)))
    print("after lstm: ", model.get_layer(index=0).output_shape) # (None, 512)
    # 全连接层
    # model.add(Flatten(input_shape=(None, n_hidden_unis)))
    model.add(Dense(2, activation='softmax'))

运行结果：