代码和结果如下:
import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression import seaborn as sns def self_func(i): np.random.seed(i) n = 101 x = np.linspace(0,10,n) noise = np.random.randn(n) y1 = 2.5 * x + 0.8 + 20.0 * noise y2 = 2.5 * x + 0.8 + 2.0 * noise lr1 = LinearRegression() lr1.fit(x.reshape(-1,1),y1) lr2 = LinearRegression() lr2.fit(x.reshape(-1,1),y2) return pd.DataFrame([[0,float(lr1.coef_)],[1,float(lr2.coef_)]],columns=['cate','coef_']) result = self_func(0) for i in range(100): df = self_func(i) result = result.append(df,ignore_index=True) sns.stripplot(x="cate", y="coef_", data=result,jitter=True) df1 = result[result['cate'].isin([0])] df2 = result[result['cate'].isin([1])] print (df1.describe()) print (df2.describe())
结果:
cate coef_ count 101.0 101.000000 mean 0.0 2.658496 std 0.0 0.641301 min 0.0 1.183393 25% 0.0 2.146773 50% 0.0 2.584440 75% 0.0 3.147349 max 0.0 4.042992 cate coef_ count 101.0 101.000000 mean 1.0 2.515850 std 0.0 0.064130 min 1.0 2.368339 25% 1.0 2.464677 50% 1.0 2.508444 75% 1.0 2.564735 max 1.0 2.654299