Old rules, first go to the source code:
from sklearn.model_selection import GridSearchCV
param_grid =[
{
'n_eatimatiors':[3,10,30],'max_features':[2,4,6,8]},
{
'bootstrap':[False],'n_estimators':[3,10],'max_features':[2,3,4]},
]
forest_reg=RandomForestRegressor(random_state=42)
grid_search=GridSearchCV(forest_reg,param_grid,cv=5,
scoring='neg_mean_squared_error',return_train_score=True)
grid_search.fit(housing_prepared,housing_labels)
Introduce the application direction and implementation effect of this code,
model tuning and grid search
1. Manually adjust the hyper-parameters, it is difficult to find a good combination
2. Use GridSearchCV to search for you, tell GridSearchCV to experiment What are the hyperparameters and the values to be tried, he will use cross-validation to evaluate all possible combinations of hyperparameters
The effect of the above code after running:
ValueError Traceback (most recent call last)
<ipython-input-353-54613827b5ad> in <module>
8 grid_search=GridSearchCV(forest_reg,param_grid,cv=5,
9 scoring='neg_mean_squared_error',return_train_score=True)
---> 10 grid_search.fit(housing_prepared,housing_labels)
d:\python3.8.5\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({
k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
d:\python3.8.5\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
734 return results
735
--> 736 self._run_search(evaluate_candidates)
737
738 # For multi-metric evaluation, store the best_index_, best_params_ and
d:\python3.8.5\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1186 def _run_search(self, evaluate_candidates):
1187 """Search all candidates in param_grid"""
-> 1188 evaluate_candidates(ParameterGrid(self.param_grid))
1189
1190
d:\python3.8.5\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
706 n_splits, n_candidates, n_candidates * n_splits))
707
--> 708 out = parallel(delayed(_fit_and_score)(clone(base_estimator),
709 X, y,
710 train=train, test=test,
d:\python3.8.5\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050
d:\python3.8.5\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
864 return False
865 else:
--> 866 self._dispatch(tasks)
867 return True
868
d:\python3.8.5\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
782 with self._lock:
783 job_idx = len(self._jobs)
--> 784 job = self._backend.apply_async(batch, callback=cb)
785 # A job can complete so quickly than its callback is
786 # called before we get here, causing self._jobs to
d:\python3.8.5\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
d:\python3.8.5\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
d:\python3.8.5\lib\site-packages\joblib\parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
d:\python3.8.5\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
d:\python3.8.5\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
518 cloned_parameters[k] = clone(v, safe=False)
519
--> 520 estimator = estimator.set_params(**cloned_parameters)
521
522 start_time = time.time()
d:\python3.8.5\lib\site-packages\sklearn\base.py in set_params(self, **params)
247 key, delim, sub_key = key.partition('__')
248 if key not in valid_params:
--> 249 raise ValueError('Invalid parameter %s for estimator %s. '
250 'Check the list of available parameters '
251 'with `estimator.get_params().keys()`.' %
ValueError: Invalid parameter n_eatimatiors for estimator RandomForestRegressor(max_features=2, random_state=42). Check the list of available parameters with `estimator.get_params().keys()`
The error prompt is very long, the core is to look at the last few lines, look at the 249 pointed by the arrow and the final error prompt.
247 key, delim, sub_key = key.partition('__')
248 if key not in valid_params:
--> 249 raise ValueError('Invalid parameter %s for estimator %s. '
250 'Check the list of available parameters '
251 'with `estimator.get_params().keys()`.' %
ValueError: Invalid parameter n_eatimatiors for estimator RandomForestRegressor(max_features=2, random_state=42). Check the list of available parameters with `estimator.get_params().keys()`.
Analyze this code carefully, first locate "n_eatimatiors", the
reason is "Invalid parameter", the
conclusion is: "n_eatimatiors" is "Invalid parameter",
then quickly find "n_eatimatiors", first check the correctness of the writing,
param_grid =[
{
'n_eatimatiors':[3,10,30],'max_features':[2,4,6,8]},
{
'bootstrap':[False],'n_estimators':[3,10],'max_features':[2,3,4]},
]
There is indeed a writing problem, "n_eatimatiors" is updated to "n_estimators",
param_grid =[
{
'n_estimators':[3,10,30],'max_features':[2,4,6,8]},
{
'bootstrap':[False],'n_estimators':[3,10],'max_features':[2,3,4]},
]
Re-run OK.
GridSearchCV(cv=5, estimator=RandomForestRegressor(random_state=42),
param_grid=[{
'max_features': [2, 4, 6, 8],
'n_estimators': [3, 10, 30]},
{
'bootstrap': [False], 'max_features': [2, 3, 4],
'n_estimators': [3, 10]}],
return_train_score=True, scoring='neg_mean_squared_error')
Experience: You will often encounter various problems in the process of writing code. It is very important to learn to analyze and solve problems, and to train more of this ability.