**不多说,直接上代码,为了防止直接抄而不是为了学习,我决定把相关的库就不放上去了,自己上网搜索库中包含的方法即可
def load_data():
data = pd.read_csv()#需要输入相关文件路径,如果是其他文件的需要查询pandas的read_其他格式
lable = pd.read_csv()
data_lable = data.merge(lable, how="left", on="USRID")
data_lable.drop(['USRID'], axis=1, inplace=True)
columns = data_lable.columns.tolist()
# print(columns)
feature_columns = [i for i in columns if i != "FLAG"]
# print(feature_columns)
data_array = data_lable[feature_columns].values#数据集
lable_array = data_lable['FLAG'].values#标签集
return train_test_split(data_array, lable_array, test_size = 0.25, random_state = 81,stratify = lable_array)#用于随机将样本集合划分为训练集 和测试集,并返回划分好的训练集和测试集数据。
def test_decision_tree(*data):
X_train,X_test,y_train,y_test=data
clf = DecisionTreeClassifier(criterion="entropy", max_depth=5)
clf.fit(X_train,y_train)
print("decision_tree:training score:{:.4f}".format(clf.score(X_train, y_train)))
print('-'*30)
# print(clf.score(X_train, y_train))
print("decision_tree:testing score:{:.4f}".format(clf.score(X_test, y_test)))
decision_tree_pre = clf.predict_proba(X_test)[:,1]
auc_test = roc_auc_score(y_test, decision_tree_pre)
print("decision_tree_auc_test", auc_test)
def test_adaboost(*data):
X_train,X_test,y_train,y_test=data
clf=AdaBoostClassifier()
clf.fit(X_train,y_train)
print("adaboost:training score:{:.4f}".format(clf.score(X_train, y_train)))
print("adaboost:testing score:{:.4f}".format(clf.score(X_test, y_test)))
adaboost_pre = clf.predict_proba(X_test)[:,1]
auc_test = roc_auc_score(y_test, adaboost_pre)
print("adaboost_auc_test", auc_test)
def test_logistic(*data):
X_train,X_test,y_train,y_test=data
clf = linear_model.LogisticRegression(solver="liblinear")
clf.fit(X_train,y_train)
print("logistic:training score:{:.4f}".format(clf.score(X_train, y_train)))
print("logistic:testing score:{:.4f}".format(clf.score(X_test, y_test)))
logistic_tree_pre = clf.predict_proba(X_test)[:,1]
auc_test = roc_auc_score(y_test, logistic_tree_pre)
print("logistic_auc_test", auc_test)
def test_rf(*data):
X_train,X_test,y_train,y_test=data
clf = RandomForestClassifier(criterion="entropy")
clf.fit(X_train,y_train)
print("rf:training score:{:.4f}".format(clf.score(X_train, y_train)))
print("rf:testing score:{:.4f}".format(clf.score(X_test, y_test)))
rf_pre = clf.predict_proba(X_test)[:,1]
auc_test_rf = roc_auc_score(y_test, rf_pre)
print("rf_auc_test", auc_test_rf)
rf_pre_train = clf.predict_proba(X_train)[:,1]
print("rf_auc_train", roc_auc_score(y_train, rf_pre_train))
如果代码有帮助到你们,麻烦点个赞,有啥问题直接评论留言哦,我看到就会回复的,希望能帮到大家,代码有什么需要改进的尽管说,我也要多多学习呢~