diff --git a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml index f313bccc..017912d1 100644 --- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml +++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml @@ -134,7 +134,7 @@ actor_rollout_ref: enforce_eager: True free_cache_engine: True load_format: dummy_dtensor - tensor_model_parallel_size: 2 + tensor_model_parallel_size: 1 max_num_batched_tokens: 12288 max_num_seqs: 1024 log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu @@ -171,7 +171,7 @@ trainer: logger: [ 'console', 'wandb' ] val_generations_to_log_to_wandb: 0 nnodes: 1 - n_gpus_per_node: 4 + n_gpus_per_node: 1 save_freq: 100 # auto: find the last ckpt to resume. If can't find, start from scratch resume_mode: auto # or auto or resume_path if