import pandas as pd
import matplotlib.pyplot as plt
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['FangSong'] # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
%matplotlib inline
test=pd.read_excel('1.xlsx')
test.head()
名称 | 熔点(°C) | 沸点(°C) | 相对密度(水=1) | |
---|---|---|---|---|
0 | 甲烷 | -182.5 | -161.50 | 0.4200 |
1 | 乙烷 | -183.3 | -88.60 | 0.4500 |
2 | 丙烷 | -187.6 | -42.09 | 0.5005 |
3 | 正丁烷 | -138.4 | -0.50 | 0.5800 |
4 | 正戊烷 | -129.8 | 36.10 | 0.6300 |
test.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 4 columns):
名称 21 non-null object
熔点(°C) 21 non-null float64
沸点(°C) 21 non-null float64
相对密度(水=1) 21 non-null float64
dtypes: float64(3), object(1)
memory usage: 752.0+ bytes
沸点的从小到大排列
test=test.sort_values(by='沸点(°C)')
test.head()
名称 | 熔点(°C) | 沸点(°C) | 相对密度(水=1) | |
---|---|---|---|---|
0 | 甲烷 | -182.5 | -161.50 | 0.4200 |
1 | 乙烷 | -183.3 | -88.60 | 0.4500 |
2 | 丙烷 | -187.6 | -42.09 | 0.5005 |
3 | 正丁烷 | -138.4 | -0.50 | 0.5800 |
4 | 正戊烷 | -129.8 | 36.10 | 0.6300 |
plt.figure(figsize=(20,10))
plt.bar(test['名称'],test['沸点(°C)'])
plt.legend(['沸点度数'])
<matplotlib.legend.Legend at 0x25233fd70f0>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ka0ofxJM-1574998063715)(output_7_1.png)]
plt.figure(figsize=(20,10))
plt.bar(test['名称'],test['相对密度(水=1)'])
plt.legend(['密度'])
<matplotlib.legend.Legend at 0x2523449add8>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-43phCvOM-1574998063717)(output_8_1.png)]
plt.figure(figsize=(20,10))
plt.bar(test['名称'],test['熔点(°C)'])
plt.legend(['熔点(°C)'])
<matplotlib.legend.Legend at 0x252340acda0>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-zF9ByXQv-1574998063718)(output_9_1.png)]
预测相对密度(水=1)
y = test['相对密度(水=1)']
X = test.drop(['相对密度(水=1)','名称'],axis=1)
print('data shape: {0}; no. positive: {1}; no. negative: {2}'.format(
X.shape, y[y==1].shape[0], y[y==0].shape[0]))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
data shape: (21, 2); no. positive: 0; no. negative: 0
通过LinearRegression算法
from sklearn import linear_model
model =linear_model.LinearRegression()
model.fit(X_train, y_train)
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print('train score: {train_score:.6f}; test score: {test_score:.6f}'.format(
train_score=train_score, test_score=test_score))
train score: 0.932488; test score: 0.771806