超参数调优进阶:Optuna/Bayesian/Early Stopping 1. 调优方法对比 超参数调优方法: ├── 网格搜索(Grid Search):穷举所有组合,慢但全面 ├── 随机搜索(Random Search):随机采样,快但不保证最优 ├── 贝叶斯优化(Bayesian):基于历史结果智能搜索 └── 早停法(Early Stopping):训练中动态停止2. Optuna 调优 import optunafrom sklearn. ensembleimport RandomForestClassifierfrom sklearn. model_selectionimport cross_val_scoredef objective ( trial) : params= { 'n_estimators' : trial. suggest_int( 'n_estimators' , 50 , 300 ) , 'max_depth' : trial. suggest_int( 'max_depth' , 3 , 15 ) , 'min_samples_split' : trial. suggest_int( 'min_samples_split' , 2 , 20 ) , 'min_samples_leaf' : trial. suggest_int( 'min_samples_leaf' , 1 , 10 ) , 'max_features' : trial. suggest_categorical( 'max_features' , [ 'sqrt' , 'log2' , None ] ) , } model= RandomForestClassifier( ** params, random_state= 42 ) scores= cross_val_score( model, X_train, y_train, cv= 5 , scoring= 'accuracy' ) return scores. mean( ) study= optuna. create_study( direction= 'maximize' ) study. optimize( objective, n_trials= 100 , show_progress_bar= True ) print ( f"最佳参数: { study. best_params} " ) print ( f"最佳分数: { study. best_value: .4f } " ) 3. XGBoost + Optuna import optunaimport xgboostas xgbdef objective_xgb ( trial) : params= { 'n_estimators' : trial. suggest_int( 'n_estimators' , 50 , 500 ) , 'max_depth' : trial. suggest_int( 'max_depth' , 3 , 12 ) , 'learning_rate' : trial. suggest_float( 'learning_rate' , 0.01 , 0.3 , log= True ) , 'subsample' : trial. suggest_float( 'subsample' , 0.6 , 1.0 ) , 'colsample_bytree' : trial. suggest_float( 'colsample_bytree' , 0.6 , 1.0 ) , 'reg_alpha' : trial. suggest_float( 'reg_alpha' , 1e-8 , 10.0 , log= True ) , 'reg_lambda' : trial. suggest_float( 'reg_lambda' , 1e-8 , 10.0 , log= True ) , } model= xgb. XGBClassifier( ** params, random_state= 42 , use_label_encoder= False ) scores= cross_val_score( model, X_train, y_train, cv= 5 , scoring= 'accuracy' ) return scores. mean( ) study= optuna. create_study( direction= 'maximize' ) study. optimize( objective_xgb, n_trials= 200 ) 4. Early Stopping import lightgbmas lgb train_data= lgb. Dataset( X_train, label= y_train) val_data= lgb. Dataset( X_val, label= y_val, reference= train_data) params= { 'objective' : 'binary' , 'metric' : 'binary_logloss' , 'learning_rate' : 0.05 , 'num_leaves' : 31 , } callbacks= [ lgb. early_stopping( stopping_rounds= 50 ) , lgb. log_evaluation( period= 10 ) , ] model= lgb. train( params, train_data, valid_sets= [ val_data] , num_boost_round= 1000 , callbacks= callbacks, ) 总结 方法 速度 精度 推荐场景 Grid Search 慢 高 小参数空间 Random Search 快 中 快速探索 Optuna 中 高 复杂参数空间 Early Stopping 快 高 训练中使用