pytorch_lightning配合optuna进行自动调参示例
Published on Aug. 22, 2023, 12:11 p.m.
参示例
import optuna
import pytorch_lightning as pl
from optuna.integration import PyTorchLightningPruningCallback
from optuna.visualization import plot_optimization_history, plot_parallel_coordinate
from string import Template
from model.myModel import myModel
"""
执行自动调参
"""
# 参考示例 https://www.kaggle.com/code/terrychanorg/optuna-wandb-logger-notebookd54069a901
def objective(trial: optuna.trial.Trial) -> float:
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-3)
# max_epochs = trial.suggest_int("max_epochs", 1, 1)
batch_size = trial.suggest_categorical("batch_size", [64, 128])
accumulate_grad_batches = trial.suggest_int("accumulate_grad_batches", 1, 10)
num_labels = trial.suggest_categorical("num_labels", [28000])
dropout = trial.suggest_categorical("dropout", [0.1, 0.2, 0.3, 0.4])
hidden_size = trial.suggest_categorical("hidden_size", [128, 256, 512, 768])
# "MomentumSGD",
optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "AdamW", "RMSprop", "Adagrad", "SGD"])
# optimizer_name = trial.suggest_categorical("optimizer_name", ["AdamW"])
#将生成的参数传递给模型
model = myModel(learning_rate=learning_rate,
batch_size=batch_size,
optimizer_name=optimizer_name,
num_labels=num_labels,
dropout=dropout,
hidden_size=hidden_size,
data_path="/kaggle/data/out",
pretrained="uer/chinese_roberta_L-2_H-512"
)
#print("hparams", model.hparams)
# 开始训练
trainer = pl.Trainer(
gpus=1,
min_epochs=1,
precision=16,
# amp_level='O2',
callbacks=[PyTorchLightningPruningCallback(trial, monitor="val_loss")],
deterministic=True,
max_epochs=1,
accumulate_grad_batches=accumulate_grad_batches
)
hyperparameters = dict(optimizer_name=optimizer_name,
accumulate_grad_batches=accumulate_grad_batches,
batch_size=batch_size,
lr=learning_rate,
dropout=dropout,
hidden_size=hidden_size
)
trainer.logger.log_hyperparams(hyperparameters)
trainer.fit(model)
return trainer.callback_metrics["val_loss"].item()
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=100, timeout=6000)
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial
print(" Value: {}".format(trial.value))
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
# 获取的最佳参数如下
best_params = {}
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
best_params[key] = value
# best_params
# 自动将最佳参数保存到配置文件,方便后续的训练使用,这里使用Template方法写入文件
s = Template("""
#%%writefile config/config_cuda.yaml
seed_everything: 42
trainer:
logger:
- class_path: pytorch_lightning.loggers.WandbLogger
init_args:
save_dir: "my_logs"
offline: false
project: "zhi"
log_model: false
prefix: ''
checkpoint_callback: true
callbacks:
- class_path: pytorch_lightning.callbacks.EarlyStopping
init_args:
monitor: val_loss #val_f1
min_delta: 0.00000
patience: 150
verbose: true
mode: min
strict: true
check_finite: true
check_on_train_epoch_end: false
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
init_args:
logging_interval: step
log_momentum: false
- class_path: pytorch_lightning.callbacks.ModelCheckpoint
init_args:
filename: '{epoch}-{val_loss:.2f}-{val_acc:.2f}-{val_precision:.2f}'
monitor: val_loss
verbose: true
save_top_k: 1
save_weights_only: false
mode: min
auto_insert_metric_name: true
save_last: true
enable_checkpointing: true
default_root_dir: "out_log"
gradient_clip_val: null
gradient_clip_algorithm: null
process_position: 0
num_nodes: 1
num_processes: null
devices: null
gpus: [0]
#gpus: [0]
auto_select_gpus: true
tpu_cores: null
ipus: null
log_gpu_memory: null
progress_bar_refresh_rate: null
enable_progress_bar: true
overfit_batches: 0.0
track_grad_norm: -1
check_val_every_n_epoch: 1
fast_dev_run: false
accumulate_grad_batches: ${accumulate_grad_batches}
max_epochs: 300
min_epochs: null
max_steps: -1
min_steps: null
max_time: "00:7:00:00" # 训练5个小时
limit_train_batches: null
limit_val_batches: null
limit_test_batches: null
limit_predict_batches: null
val_check_interval: null
flush_logs_every_n_steps: null
log_every_n_steps: 20
accelerator: null
strategy: null
sync_batchnorm: false
precision: 16 #16
enable_model_summary: true
weights_summary: top
weights_save_path: null
num_sanity_val_steps: 2
resume_from_checkpoint: null
profiler: null
benchmark: null
deterministic: false
reload_dataloaders_every_n_epochs: 1
auto_lr_find: false
replace_sampler_ddp: true
detect_anomaly: false
auto_scale_batch_size: false
prepare_data_per_node: null
plugins: null
amp_backend: native
amp_level: null
move_metrics_to_cpu: false
multiple_trainloader_mode: max_size_cycle
stochastic_weight_avg: false
terminate_on_nan: null
# log_dir: logs
model:
learning_rate: ${learning_rate}
T_max: 5
optimizer_name: ${optimizer_name}
batch_size: 128
data_path: /kaggle/data/out
pretrained: uer/chinese_roberta_L-2_H-512
T_mult: 2
T_0: 500
num_labels: ${num_labels}
dropout: ${dropout}
hidden_size: ${hidden_size}
ckpt_path: null
""")
print(s.safe_substitute(**best_params),
file=open('config/config_cuda_best.yaml', 'w'))
# !cat config/config_cuda.yaml
参考
https://www.notion.so/terrychanorg/pytorch_lightning-optuna-b98f4f71c5b0472880af2cfe655acb9a