题目描述:
代码如下:
from sklearn import datasets from sklearn import cross_validation from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn import metrics X,y = datasets.make_classification(n_samples = 1000, n_features = 10) print("X:",X) print("Y:",y) kf = cross_validation.KFold(1000, n_folds = 10, shuffle = True) for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] print ("\nX_train:",X_train) print ("X_test:",X_test) print ("Y_train:",y_train) print ("Y_test:",y_test) #Gaussian 朴素贝叶斯方法: print("\nGaussian:") clf1 = GaussianNB() clf1.fit(X_train,y_train) pred1 = clf1.predict(X_test) print ("X的预测值:",pred1) print ("Y的测试值为:",y_test) #SVC支持向量机方法 print("\nSVC:") clf2 = SVC(C = 1e-01,kernel = 'rbf',gamma=0.1) clf2.fit(X_train,y_train) pred2 = clf2.predict(X_test) print ("X的预测值: ",pred2) print ("Y的测试值为:",y_test) #RandomForestClassifier print("\nRandomForestClassifier:") clf3 = RandomForestClassifier(n_estimators=6) clf3.fit(X_train, y_train) pred3 = clf3.predict(X_test) print ("X的预测值: ",pred3) print ("Y的测试值为:",y_test) def evaluate(y_test, pred, method): acc = metrics.accuracy_score(y_test, pred) f1 = metrics.f1_score(y_test, pred) auc = metrics.roc_auc_score(y_test, pred) print(method + ":") print ("Accuracy: ",acc) print ("F1-score: ",f1) print ("AUC ROC: ",auc) print("\n") print("\n性能评估") evaluate(y_test, pred1, "Guassian:") evaluate(y_test, pred2, "Guassian:") evaluate(y_test, pred3, "Guassian:")
运行结果:
Step 1:
Step 2:
Step 3:
Step 4:
Step 5:
所以从Step 4中可以看出,RandomForestClassifier方法更好