高级编程技术,第十五周

作业题目如下:


代码如下:

1.库的引用

from sklearn import datasets,cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
2.交叉检验划分
dataset = datasets.make_classification(n_samples=2000, n_features=15)
data, target = dataset[0], dataset[1]
kf = cross_validation.KFold(len(target), n_folds=10, shuffle=True)

3.机器学习部分代码

num = 1
for train_index, test_index in kf:
    data_train, target_train = data[train_index], target[train_index]
    data_test, target_test = data[test_index], target[test_index]
    print("Test:",num)
    num = num + 1
    
    print("GaussianNB:")
    clf = GaussianNB()
    clf.fit(data_train, target_train)
    pred = clf.predict(data_test)
    print("Accuracy:", metrics.accuracy_score(target_test, pred))
    print("F1-score:", metrics.f1_score(target_test, pred))
    print("AUC ROC:",metrics.roc_auc_score(target_test, pred))
    
    print("SVC:")
    clf = SVC(C=1e-01, kernel='rbf', gamma=0.1)
    clf.fit(data_train, target_train)
    pred = clf.predict(data_test)
    print("Accuracy:", metrics.accuracy_score(target_test, pred))
    print("F1-score:", metrics.f1_score(target_test, pred))
    print("AUC ROC:",metrics.roc_auc_score(target_test, pred))
    
    print("RandomForestClassifier:")
    clf = RandomForestClassifier(n_estimators=100)
    clf.fit(data_train, target_train)
    pred = clf.predict(data_test)
    print("Accuracy:", metrics.accuracy_score(target_test, pred))
    print("F1-score:", metrics.f1_score(target_test, pred))
    print("AUC ROC:",metrics.roc_auc_score(target_test, pred))
    print()

输出结果:


 

通过实验可知,二分类问题中,随机森林算法的效果比朴素贝叶斯和向量机都更好

猜你喜欢

转载自blog.csdn.net/qq_36319729/article/details/80734083
今日推荐