diff --git a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml index b8291b3f..f313bccc 100644 --- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml +++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml @@ -134,7 +134,7 @@ actor_rollout_ref: enforce_eager: True free_cache_engine: True load_format: dummy_dtensor - tensor_model_parallel_size: 4 + tensor_model_parallel_size: 2 max_num_batched_tokens: 12288 max_num_seqs: 1024 log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu