Pyrhon3+Tensorflow+泰坦尼克号数据集(数据预处理+预测)

import tensorflow as tf
import numpy as np
import pandas as pd

#数据预处理
def read_data():
    data=pd.read_csv('train.csv')  #pandas 读取
    data=data.fillna(0)     #NAN   填入0

    datax=data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare','Embarked']]  #pandas选择列
    datax=pd.get_dummies(datax)            # one—hot 编码
    data['Deceased'] = data['Survived'].apply(lambda s: 1 - s)  #one -hot编码
    datay = data[['Survived','Deceased']]
    return datax,datay


if __name__=="__main__":
    new_datax,new_datay=read_data()
    new_dataxx = new_datax.values.astype(np.float32)  #把pandas矩阵  转化为np矩阵没有二维标签变为存矩阵
    new_datayy = new_datay.values.astype(np.float32)  # 把pandas矩阵  转化为np矩阵没有二维标签变为存矩阵
    np.random.shuffle(new_dataxx)   #随机打乱顺序
    np.random.shuffle(new_datayy)  # 随机打乱顺序

    sep = int(0.7 * len(new_dataxx))   #拆分数据集
    train_datax = new_dataxx[:sep]  # training data (70%)
    train_datay=new_datayy[:sep]
    test_datax = new_dataxx[sep:]  # test data (30%)
    test_datay=new_datayy[sep:]
    # new_datax.to_csv("Taitan_onehot.csv", index=False)
    #简便方法   打乱+拆分
    '''
    分割的简单方法    from sklearn.model_selection import train_test_split  
    X_train, X_val, y_train, y_val = train_test_split(dataset_X, dataset_Y,
                                                  test_size=0.2,
                                                  random_state=42)
    '''

    #全链接神经层 的输入   tf_inputx 训练数据的特征信息     tf_inputy训练数据的标签
    tf_inputx=tf.placeholder(tf.float32, [None, 11])
    tf_inputy=tf.placeholder(tf.float32,[None,2])

    #搭建全链接神经网络
    l1 = tf.layers.dense(tf_inputx, 1000, tf.nn.relu, name="l1")
    l2 = tf.layers.dense(l1, 900, tf.nn.relu, name="l2")
    out = tf.layers.dense(l2, 2, name="l3")
    
    #计算概率    相加为1
    prediction=tf.nn.softmax(out, name="pred")
    
    #计算误差  代价函数
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_inputy, logits=out))
    #优化器  减少误差
    train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
    #初始化变量
    init = tf.global_variables_initializer()
    '''
    correct_prediction = tf.equal(tf.argmax(tf_inputy, 1), tf.argmax(prediction, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    '''
    #创建会话
    with tf.Session() as sess:
        sess.run(init)  #初始化变量
        for i in range(2000):   #控制迭代次数
            
            # 启动优化其  减少误差 开始训练
            sess.run(train_step, feed_dict={tf_inputx: train_datax, tf_inputy: train_datay})  
           
            # 查看当前训练实时数据反馈
            print("number:"+str(i))
            print(sess.run(out, feed_dict={tf_inputx: train_datax, tf_inputy: train_datay}))

           # acc = sess.run(accuracy, feed_dict={tf_inputx: test_datax, tf_inputy: test_datay})
           # print("Iter" + str(i) + ",Testing Accuracy" + str(acc))
        #训练完成后  进行预测 查看预测结果
        print("test")
        print(sess.run(prediction,feed_dict={tf_inputx:test_datax}))

猜你喜欢

转载自blog.csdn.net/qq_39622065/article/details/80106626