This commit is contained in:
Oliver 2025-04-28 22:32:10 +01:00
parent f57b5adcb0
commit 8dd7c86368

View file

@ -134,7 +134,7 @@ actor_rollout_ref:
enforce_eager: True
free_cache_engine: True
load_format: dummy_dtensor
tensor_model_parallel_size: 2
tensor_model_parallel_size: 1
max_num_batched_tokens: 12288
max_num_seqs: 1024
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
@ -171,7 +171,7 @@ trainer:
logger: [ 'console', 'wandb' ]
val_generations_to_log_to_wandb: 0
nnodes: 1
n_gpus_per_node: 4
n_gpus_per_node: 1
save_freq: 100
# auto: find the last ckpt to resume. If can't find, start from scratch
resume_mode: auto # or auto or resume_path if