TensorFlow预测燃油效率

环境 jupyter notebook tensorflow2.1.0 python3.7.5

import pathlib
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras as keras
from tensorflow.keras import layers as layers
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
print(dataset_path)
/Users/xxxx/.keras/datasets/auto-mpg.data
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin']
#'每加伦汽油能距的英里数','汽缸','排量','马力','重量',“加速”,“型号年份”,“来源”
raw_dataset=pd.read_csv(dataset_path,
                        names=column_names, # 表头
                        na_values='?', # 把未知名改为?
                        comment='\t', 
                        sep=' ', # 要使用的定界符
                        skipinitialspace=True) # 在定界符后跳过空格。
dataset = raw_dataset.copy()
dataset.tail() # 返回最后n行,n默认5
MPG Cylinders Displacement Horsepower Weight Acceleration Model Year Origin
393 27.0 4 140.0 86.0 2790.0 15.6 82 1
394 44.0 4 97.0 52.0 2130.0 24.6 82 2
395 32.0 4 135.0 84.0 2295.0 11.6 82 1
396 28.0 4 120.0 79.0 2625.0 18.6 82 1
397 31.0 4 119.0 82.0 2720.0 19.4 82 1
dataset.isna().sum() # 计算缺失值的个数
MPG             0
Cylinders       0
Displacement    0
Horsepower      6
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64
dataset = dataset.dropna() # 删除缺失值
origin=dataset.pop('Origin') # 弹出Origin标签,并用origin来获取dataset中的Origin的值
dataset['USA']=(origin==1)*1.0 # 如果origin为1,则USA的标签下为1.0
dataset['Europe']=(origin==2)*1.0 # 如果origin为2,则Europe的标签下为1.0
dataset['Japan']=(origin==3)*1.0 # 如果origin为3,则Japan的标签下为1.0
dataset.tail()
MPG Cylinders Displacement Horsepower Weight Acceleration Model Year USA Europe Japan
393 27.0 4 140.0 86.0 2790.0 15.6 82 1.0 0.0 0.0
394 44.0 4 97.0 52.0 2130.0 24.6 82 0.0 1.0 0.0
395 32.0 4 135.0 84.0 2295.0 11.6 82 1.0 0.0 0.0
396 28.0 4 120.0 79.0 2625.0 18.6 82 1.0 0.0 0.0
397 31.0 4 119.0 82.0 2720.0 19.4 82 1.0 0.0 0.0
train_dataset = dataset.sample(frac=0.8,random_state=0) #以随机数种子0在数据集中抽取80%
test_dataset = dataset.drop(train_dataset.index) # 在数据集中删除训练集作为测试集
sns.pairplot(train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]],diag_kind='kde')
<seaborn.axisgrid.PairGrid at 0x10cd98110>

train_stats = train_dataset.describe()
train_stats.pop("MPG") # jupyter 交换环境弹出并输出输出MPG
count    314.000000
mean      23.310510
std        7.728652
min       10.000000
25%       17.000000
50%       22.000000
75%       28.950000
max       46.600000
Name: MPG, dtype: float64
train_stats = train_stats.transpose()
train_stats
count mean std min 25% 50% 75% max
Cylinders 314.0 5.477707 1.699788 3.0 4.00 4.0 8.00 8.0
Displacement 314.0 195.318471 104.331589 68.0 105.50 151.0 265.75 455.0
Horsepower 314.0 104.869427 38.096214 46.0 76.25 94.5 128.00 225.0
Weight 314.0 2990.251592 843.898596 1649.0 2256.50 2822.5 3608.00 5140.0
Acceleration 314.0 15.559236 2.789230 8.0 13.80 15.5 17.20 24.8
Model Year 314.0 75.898089 3.675642 70.0 73.00 76.0 79.00 82.0
USA 314.0 0.624204 0.485101 0.0 0.00 1.0 1.00 1.0
Europe 314.0 0.178344 0.383413 0.0 0.00 0.0 0.00 1.0
Japan 314.0 0.197452 0.398712 0.0 0.00 0.0 0.00 1.0
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
# 这个标签是使用训练模型进行预测的值。
def norm(x):
    return (x - train_stats['mean']) / train_stats['std'] # 化成0-1正态分布
normed_train_data = norm(train_dataset) # 归一化
normed_test_data = norm(test_dataset) # 归一化
model = keras.models.Sequential()
model.add(layers.Dense(64,activation='relu',input_shape=[len(train_dataset.keys())]))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))    
model.compile(loss='mse',
              optimizer=tf.keras.optimizers.RMSprop(0.001),
              metrics=['mae', 'mse'])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 64)                640       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
=================================================================
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
print(example_result)
[[ 0.06187941]
 [ 0.16284567]
 [ 0.19416149]
 [ 0.3226478 ]
 [ 0.09883147]
 [ 0.00343724]
 [ 0.13330291]
 [ 0.62984717]
 [-0.05348695]
 [ 0.44078857]]
# 通过为每个完成的时期打印一个点来显示训练进度
class PrintDot(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if (epoch%100==0):
            print(' ') # 每一百行换行
        print('.',end=' ')
history=model.fit(normed_train_data,
                  train_labels,
                  epochs=1000,
                  validation_split=0.2, # 把训练集的20%作为验证集
                  verbose=0,  # 不显示进度条
                  callbacks=[PrintDot()]) # 回调函数为PrintDot
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 
hist=pd.DataFrame(history.history)
hist['epoch']=history.epoch # 增加epoch的标签
hist.tail()
loss mae mse val_loss val_mae val_mse epoch
995 2.448885 0.975710 2.448885 9.030066 2.268713 9.030066 995
996 2.376843 0.999163 2.376843 9.096817 2.273271 9.096817 996
997 2.383884 0.992754 2.383883 9.657296 2.356696 9.657296 997
998 2.504148 1.021134 2.504148 9.152325 2.318949 9.152325 998
999 2.421463 0.947287 2.421463 9.146635 2.284075 9.146635 999
def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [MPG]')
  plt.plot(hist['epoch'], hist['mae'], # (x,y)
           label='Train Error') # 线段的名称即标签卡上的名称
  plt.plot(hist['epoch'], hist['val_mae'],
           label = 'Val Error')
  plt.ylim([0,5])
  plt.legend()

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error [$MPG^2$]')
  plt.plot(hist['epoch'], hist['mse'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error')
  plt.ylim([0,20])
  plt.legend() # 打印标签卡
  plt.show()

plot_history(history) # 该图表显示在约100个epoch之后,误差非但没有改进,反而出现恶化。

train_dataset.keys()
Index(['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration',
       'Model Year', 'USA', 'Europe', 'Japan'],
      dtype='object')
model = keras.models.Sequential()
model.add(layers.Dense(64,activation='relu',input_shape=[len(train_dataset.keys())]))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))
model.compile(loss='mse',
              optimizer=tf.keras.optimizers.RMSprop(0.001),
              metrics=['mae', 'mse'])
history=model.fit(normed_train_data,
                  train_labels,
                  epochs=1000,
                  validation_split=0.2,
                  verbose=0,
                  callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10), 
                             # 当绝对变化值小于min_data,则退出,min_data默认为0
                             PrintDot()])
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 
plot_history(history)

test_predictions = model.predict(normed_test_data).flatten() # 展平
plt.scatter(test_labels,test_predictions)
plt.xlabel('True values [MPG]')
plt.ylabel("Predictions [MPG]")
plt.axis('equal') # 等比例
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100,100],[-100,100]) # 画一条经过(-100,-100)与(100,100)的线段

error = test_predictions-test_labels
plt.hist(error, bins=25) # 有bins条数
plt.xlabel('Prediction Error [MPG]')
_ = plt.ylabel("Count")

发布了81 篇原创文章 · 获赞 27 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/qq_43309286/article/details/104553667