I am using sklearn along with optuna for HPO. I would like to create a custom function that would take an sklearn pipeline as input and return optuna-specific grids. Returning sklearn specific param grids (i.e. dictionaries) seems to be more straightforward (duh) ; this is what I 've got so far :
def grid_from_estimator(estimator, type = 'sklearn'):
estimator_name = estimator.named_steps['estimator'].__class__.__name__
if type == 'sklearn':
if estimator_name=='LogisticRegression':
params = {
'estimator__penalty': ['l1','elasticnet'],
'estimator__C': np.logspace(-4, 4, 20)
}
elif estimator_name=='LGBMClassifier':
params = {
'estimator__n_estimators': np.arange(100, 1000, 200),
'estimator__boosting_type':['gbdt','dart'],
'estimator__max_depth': np.arange(6, 12),
'estimator__num_leaves': np.arange(30, 150,5),
'estimator__learning_rate': [1e-2/2 , 1e-2, 1e-1/2, 1e-1, 0.5, 1],
'estimator__min_child_samples': np.arange(20, 100, 5),
'estimator__subsample': np.arange(0.65, 1, 0.05),
'estimator__colsample_bytree': np.arange(0.4, 0.75, 0.05),
'estimator__reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
'estimator__reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100],
'estimator__iterations': np.arange(100, 800, 100),
'estimator__objective': 'binary'
}
elif type == 'optuna':
if estimator_name == 'LogisticRegression':
params = {
'estimator__penalty': trial.suggest_categorical('penalty', ['l1', 'elasticnet']),
'estimator__C': trial.suggest.suggest_loguniform('c', -4, 4)
}
elif estimator_name == 'LGBMClassifier':
params = {
'estimator__n_estimators': trial.suggest_int('n_estimators', 100, 1000),
'estimator__boosting_type': trial.suggest_categorical('boosting_type', ['gbdt', 'dart']),
'estimator__max_depth': trial.suggest_int('max_depth', 6, 12),
'estimator__num_leaves': trial.suggest_int('num_leaves', 30, 150, 5),
'estimator__learning_rate': trial.suggest_float('learning_rate', 1e-4, 1),
'estimator__min_child_samples': trial.suggest_int('min_child_samples', 20, 100),
'estimator__subsample': trial.suggest_float('subsample', 0.5, 1),
'estimator__colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 0.75),
'estimator__reg_alpha': trial.suggest_float('reg_alpha', 1e-2, 10),
'estimator__reg_lambda': trial.suggest_float('reg_lambda', 1e-2, 10)
}
return params
The "trial.suggest_..." parts keeps 'complaining' and returning an error; although I understand the reason why, I can't see any way around it. Is this even possible? Any ideas? Appreciate your support!
An example method using optuna ask and tell interface.
import optuna
import numpy as np
def optuna_objective(estimator_name, params):
if estimator_name == 'LogisticRegression':
x = params['x']
y = params['y']
return (x - 2) ** 2 + y
if estimator_name == 'LGBMClassifier':
# estimator__n_estimators = params['estimator__n_estimators']
# return accuracy
pass
return None
def grid_from_estimator(estimator_name, type_='sklearn', study=None):
params, trial = None, None
if type_ == 'sklearn':
if estimator_name == 'LogisticRegression':
params = {
'estimator__penalty': ['l1','elasticnet'],
'estimator__C': np.logspace(-4, 4, 20)
}
elif estimator_name == 'LGBMClassifier':
params = {
'estimator__n_estimators': np.arange(100, 1000, 200),
'estimator__boosting_type':['gbdt','dart'],
'estimator__max_depth': np.arange(6, 12),
'estimator__num_leaves': np.arange(30, 150,5),
'estimator__learning_rate': [1e-2/2 , 1e-2, 1e-1/2, 1e-1, 0.5, 1],
'estimator__min_child_samples': np.arange(20, 100, 5),
'estimator__subsample': np.arange(0.65, 1, 0.05),
'estimator__colsample_bytree': np.arange(0.4, 0.75, 0.05),
'estimator__reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
'estimator__reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100],
'estimator__iterations': np.arange(100, 800, 100),
'estimator__objective': 'binary'
}
elif type_ == 'optuna':
trial = study.ask()
if estimator_name == 'LogisticRegression':
params = {
'x': trial.suggest_float('x', -10, 10),
'y': trial.suggest_float('y', -10, 10)
}
# params = {
# 'estimator__penalty': trial.suggest_categorical('estimator__penalty', ['l1', 'elasticnet']),
# 'estimator__C': trial.suggest_float('estimator__C', -4, 4)
# }
elif estimator_name == 'LGBMClassifier':
params = {
'estimator__n_estimators': trial.suggest_int('estimator__n_estimators', 100, 1000),
'estimator__boosting_type': trial.suggest_categorical('estimator__boosting_type', ['gbdt', 'dart']),
'estimator__max_depth': trial.suggest_int('estimator__max_depth', 6, 12),
'estimator__num_leaves': trial.suggest_int('estimator__num_leaves', 30, 150, 5),
'estimator__learning_rate': trial.suggest_float('estimator__learning_rate', 1e-4, 1),
'estimator__min_child_samples': trial.suggest_int('estimator__min_child_samples', 20, 100),
'estimator__subsample': trial.suggest_float('estimator__subsample', 0.5, 1),
'estimator__colsample_bytree': trial.suggest_float('estimator__colsample_bytree', 0.4, 0.75),
'estimator__reg_alpha': trial.suggest_float('estimator__reg_alpha', 1e-2, 10),
'estimator__reg_lambda': trial.suggest_float('estimator__reg_lambda', 1e-2, 10)
}
return params, trial
# (1) sklearn example
print('SKLEARN')
estimator_name = 'LogisticRegression'
optimizer_type = 'sklearn'
params, _ = grid_from_estimator(estimator_name, type_=optimizer_type)
print(params)
print()
# (2) Optuna example with ask and tell interface.
print('OPTUNA')
study = optuna.create_study(direction='maximize')
n_trials = 10
estimator_name = 'LogisticRegression'
optimizer_type = 'optuna'
for _ in range(n_trials):
params, trial = grid_from_estimator(estimator_name, type_=optimizer_type, study=study)
objective_value = optuna_objective(estimator_name, params)
study.tell(trial, objective_value) # tell the pair of trial and objective value
print(f'trialnum: {trial.number}, params: {params}, value: {objective_value}')
best_params = study.best_params
best_x = best_params["x"]
best_y = best_params["y"]
best_value = study.best_value
best_trial_num = study.best_trial.number
print(f"best x: {best_x}, best y: {best_y}, (x - 2)^2 + y: {(best_x - 2) ** 2 + best_y}, best_value: {best_value}, best_trial_num: {best_trial_num}") # trial num starts at 0
SKLEARN
{'estimator__penalty': ['l1', 'elasticnet'], 'estimator__C': array([1.00000000e-04, 2.63665090e-04, 6.95192796e-04, 1.83298071e-03,
4.83293024e-03, 1.27427499e-02, 3.35981829e-02, 8.85866790e-02,
2.33572147e-01, 6.15848211e-01, 1.62377674e+00, 4.28133240e+00,
1.12883789e+01, 2.97635144e+01, 7.84759970e+01, 2.06913808e+02,
5.45559478e+02, 1.43844989e+03, 3.79269019e+03, 1.00000000e+04])}
OPTUNA
[I 2021-11-25 19:03:09,673] A new study created in memory with name: no-name-f5046b21-f579-4c74-8046-79420c256d4a
trialnum: 0, params: {'x': 2.905894660287128, 'y': -4.537699327718261}, value: -3.7170541921815303
trialnum: 1, params: {'x': -9.275103438355583, 'y': -5.925000918692578}, value: 121.2029566269253
trialnum: 2, params: {'x': -2.9531168045205103, 'y': 5.253730464314739}, value: 29.78709654353821
trialnum: 3, params: {'x': 3.766902399344163, 'y': 3.778408673279479}, value: 6.900352762087639
trialnum: 4, params: {'x': -0.897563829823584, 'y': -0.887774211794973}, value: 7.508101936106943
trialnum: 5, params: {'x': -2.2256917634354645, 'y': 3.8017184220598903}, value: 21.658189301626216
trialnum: 6, params: {'x': -6.333366980619912, 'y': 9.87067058585388}, value: 79.3156758195401
trialnum: 7, params: {'x': 2.570258991787558, 'y': -0.1959178948625162}, value: 0.1292774228520457
trialnum: 8, params: {'x': 2.94430596072913, 'y': 4.318454050149043}, value: 5.210167797617609
trialnum: 9, params: {'x': 5.972023459737699, 'y': 4.165369460555215}, value: 19.942339825261854
best x: -9.275103438355583, best y: -5.925000918692578, (x - 2)^2 + y: 121.2029566269253, best_value: 121.2029566269253, best_trial_num: 1
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With