序列
序列模型是layer-by-layer的,它是最简单的定义模型的方法,但是有几个不足:
• 不能够共享某一层
• 不能有多个分支
• 不能有多个输入
这种结构的经典网络比如有:Lenet5,AlexNet,VGGNet
#导入必备包
import tensorflow as tf
#开始定义模型
def shallownet_sequential(width,height,depth,classes):
#channel last,用输入形状来初始化模型
model = tf.keras.Sequential()
inputshape=(height,width,depth)
model.add(tf.keras.layers.Conv2D(32,(3,3),padding='same',input_shape=inputshape))
model.add(tf.keras.layers.Activation("relu"))
#softmax
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(classes))
model.add(tf.keras.layers.Activation("softmax"))
return model
model = shallownet_sequential(28,28,3,10)
model.summary()
inputs=tf.random.normal((4,28,28,3))
outputs = model(inputs)
print(outputs. Shape)
函数式
函数式API有更强的功能
• 定义更复杂的模型
• 支持多输入多输出
• 可以定义模型分支,比如inception block , resnet block
• 方便layer共享
#导入必备包
import tensorflow as tf
def conv_module(x, k, kx, ky, stride, chandim, padding="same"):
# conv-bn-relu
x = tf.keras.layers.Conv2D(k, (kx, ky), strides=stride, padding=padding)(x)
x = tf.keras.layers.BatchNormalization(axis=chandim)(x)
x = tf.keras.layers.Activation('relu')(x)
return x
def inception_module(x, num1x1, num3x3, chandim):
conv_1x1 = conv_module(x, num1x1, 1, 1, (1, 1), chandim)
conv_3_3 = conv_module(x, num3x3, 3, 3, (1, 1), chandim)
x = tf.keras.layers.concatenate([conv_1x1, conv_3_3], axis=chandim)
return x
def downsample_module(x, k, chandim):
# conv downsample and pool downsample
conv_3x3 = conv_module(x, k, 3, 3, (2, 2), chandim, padding='valid')
pool = tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = tf.keras.layers.concatenate([conv_3x3, pool], axis=chandim)
return x
# 然后定义整个MiniGoogLeNet
def minigooglenet_functional(width, height, depth, classes):
inputshape = (height, width, depth)
chandim = -1
# define inputs and firse conv
inputs = tf.keras.layers.Input(shape=inputshape)
x = conv_module(inputs, 96, 3, 3, (1, 1), chandim)
# def two inception and followed by a downsample
x = inception_module(x, 32, 32, chandim)
x = inception_module(x, 32, 48, chandim)
x = downsample_module(x, 80, chandim)
# def four inception and one downsample
x = inception_module(x, 112, 48, chandim)
x = inception_module(x, 96, 64, chandim)
x = inception_module(x, 80, 80, chandim)
x = inception_module(x, 48, 96, chandim)
x = downsample_module(x, 96, chandim)
# def two inception followed by global pool and dropout
x = inception_module(x, 176, 160, chandim)
x = inception_module(x, 176, 160, chandim)
x = tf.keras.layers.AveragePooling2D((7, 7))(x)
x = tf.keras.layers.Dropout(0.5)(x)
# softmax
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(classes)(x)
x = tf.keras.layers.Activation('softmax')(x)
# create the model
model = tf.keras.Model(inputs, x, name='MiniGoogLeNet')
return model
model = minigooglenet_functional(288,288,3,10)
model.summary()
inputs=tf.random.normal((4,288,288,3))
outputs = model(inputs)
print(outputs.shape)
子类方法
import tensorflow as tf
x=tf.constant([[1,2,3],[4,5,6]])
y=tf.constant([[10.0],[20.0]])
class Linear(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense=tf.keras.layers.Dense(units=1,activation=tf.nn.relu,kernel_initializer=tf.zeros_initializer,bias_initializer=tf.zeros_initializer)
def call(self,input):
output=self.dense(input)
return output
model=Linear()
optimizer=tf.keras.optimizers.SGD(learning_rate=0.01)
for epoch in range(100):
with tf.GradientTape() as tape:
y_pred=model(x)
loss=tf.reduce_mean(tf.square(y_pred-y))
grads=tape.gradient(loss,model.variables)
optimizer.apply_gradients(grads_and_vars=zip(grads,model.variables))
print(model.variables)