文本 +RNN、LSTM一个框架

from keras.datasets import imdb
from keras import preprocessing
max_features = 10000             # 作为特征的单词个数
maxlen = 500                     # 截断文本从而只有20个词
(x_train, y_train),(x_test,y_test) = imdb.load_data(num_words = max_features)
print(len(x_train),'train sequences')    # 25000
print(len(x_test),'test sequences')      # 25000
# 将整数列表转换成形状为(samples,maxlen)二维整数张量
x_train = preprocessing.sequence.pad_sequences(x_train,maxlen=maxlen)
x_test = preprocessing.sequence.pad_sequences(x_test,maxlen=maxlen)
print('train shape:' ,x_train.shape)      # 25000,500
print('test shape:' ,x_test.shape)        # 25000,500

#############################################################################1.SimpleRNN
from keras.models import Sequential
from keras.layers import Flatten,Dense,Embedding,SimpleRNN
model = Sequential()
'''
Embedding介绍
input_dim:大或等于0的整数,字典长度,即输入数据最大下标+1 
output_dim:大于0的整数,代表全连接嵌入的维度
input_length:当输入序列的长度固定时,该值为其长度。
如果要在该层后接Flatten层,然后接Dense层,则必须指定该参数,否则Dense层的输出维度无法自动推断。 
接受的形状 形如(samples,sequence_length)的2D张量 sequence_length:一个样本有多少长度
输出shape 形如(samples, sequence_length, output_dim)的3D张量 sample是样本数:这里为250000
'''
model.add(Embedding(max_features,32))
model.add(SimpleRNN(32))                                                               #展平
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['acc'])
print(model.summary())
history = model.fit(x_train,y_train,epochs=10,batch_size=128,validation_split=0.2)
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1,len(acc)+1)
plt.plot(epochs,acc,'bo',label="Training acc")
plt.plot(epochs,val_acc,'bo',label="Validation acc")
plt.title("Training and Validation accuracy")
plt.legend()
plt.figure()
plt.show()
#############################################################################2.LSTM
from keras.layers import LSTM
model = Sequential()
model.add(Embedding(max_features,32))
model.add(LSTM(32))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['acc'])
print(model.summary())
history = model.fit(x_train,y_train,epochs=10,batch_size=128,validation_split=0.2)
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1,len(acc)+1)
plt.plot(epochs,acc,'bo',label="Training acc")
plt.plot(epochs,val_acc,'bo',label="Validation acc")
plt.title("Training and Validation accuracy")
plt.legend()
plt.figure()
plt.show()




猜你喜欢

转载自blog.csdn.net/kylin_learn/article/details/85029021