scikit-learn中GridSearchCV的使用:Keras接口

首先请先理解GridSearchCV在scikit-learn中的使用,可见博文:scikit-learn中GridSearchCV的使用:多模型,可视化,该博文中有的子函数,本文不再赘述。


【环境搭建】 

import tensorflow,keras
from keras.models import Sequential
from keras.layers import Dense
from keras.preprocessing import sequence
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.model_selection import GridSearchCV,cross_val_score
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.metrics import auc,roc_auc_score,roc_curve,precision_recall_curve

np.random.seed(1231)
x_train,y_train,x_test,y_test = x_train,y_train,x_test,y_test

【学习模型】 

def grid_model(optimizer): #optimizer为要GridSearch的参数,即寻找最好的optimizer
	
	model = Sequential()
	model.add(Dense(196,activation = 'relu'))
	model.add(Dense(1,activation = 'sigmoid'))
	optimizer = optimizer(0.0001)
	model.compile(optimizer = optimizer,loss = 'binary_crossentropy',metrics = ['accuracy','mse'])
  
	return model

 【Keras和sklearn的接口】

model = KerasClassifier(build_fn=grid_model, epochs=50, batch_size=64, verbose=1)

【GridSearchCV模型】

optimizer = [SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam] #要寻找最佳的optimizer
param_grid = dict(optimizer=optimizer) #转换为GridSearchCV可接受的形式

scoring = {'roc_auc':'roc_auc','accuracy':'accuracy', 'precision':'precision','recall':'recall','f1':'f1'} 

grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5,verbose=2,scoring=scoring,refit='roc_auc',n_jobs=-1,return_train_score=True) #使用Keras接口包装后的model,即gird

fit = grid.fit(x_train, y_train) #用GridSearchCV的Keras模型grid拟合训练集数据,得到estimator集合fit

result_print(fit,train_seq1,train_y1,test_seq1,test_y1) #打印/输出/展示结果

【子函数】result_print(详见上篇博文)

def result_print(fit,x_train,y_train,x_test,y_test):
	y_train_pred = fit.best_estimator_.predict(x_train) #用best_estimator预测y_train_pred
	y_train_pred_binary = [] #因为是0/1二分类,而sigmoid函数的值域是(0,1),所以要强制分类。
	for item in y_train_pred:
		if item <= 0.5:
			a = 0
		else:
			a = 1
		y_train_pred_binary.append(a)
		
	y_test_pred = fit.best_estimator_.predict(x_test) #用best_estimator预测y_test_pred
	y_test_pred_binary = [] #因为是0/1二分类,而sigmoid函数的值域是(0,1),所以要强制分类。
	for item in y_test_pred:
		if item <= 0.5:
			a = 0
		else:
			a = 1
		y_test_pred_binary.append(a)

	cv_results = pd.DataFrame(fit.cv_results_).set_index(['params'])
	cv_results_mean = cv_results[['mean_train_accuracy', 'mean_train_f1','mean_train_precision', 'mean_train_recall', 'mean_train_roc_auc',
								  'mean_test_accuracy','mean_test_f1', 'mean_test_precision', 'mean_test_recall','mean_test_roc_auc']]
	cv_results_std = cv_results[['std_train_accuracy', 'std_train_f1', 'std_train_precision','std_train_recall', 'std_train_roc_auc',
								 'std_test_accuracy', 'std_test_f1','std_test_precision', 'std_test_recall', 'std_test_roc_auc']]


	print('Best cv_test_roc_auc: %f using %s' % (fit.best_score_,fit.best_params_))
	print(cv_results_mean)
	print(cv_results_std)

	train_score_list = []
	test_score_list = []
	score_list = []
	model_metrics_name = [accuracy_score,precision_score,recall_score,f1_score,roc_auc_score,aupr] 
	for matrix in model_metrics_name:
		train_score = matrix(y_train,y_train_pred_binary)
		test_score = matrix(y_test,y_test_pred_binary)
		train_score_list.append(train_score)
		test_score_list.append(test_score)
	score_list.append(train_score_list)
	score_list.append(test_score_list)
	score_df = pd.DataFrame(score_list,index = ['train','test'],columns = ['accuracy','precision','recall','f1','roc_auc','aupr'])
	print("Best: %f using %s" % (fit.best_score_, fit.best_params_))
	print('EVALUATE_METRICS:')
	print(score_df)

猜你喜欢

转载自blog.csdn.net/weixin_41171061/article/details/83861393