diff --git a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml index 26cd4989..3fb47c6c 100644 --- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml +++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml @@ -64,8 +64,8 @@ data: prompt_key: prompt max_prompt_length: 4096 max_response_length: 2048 - train_batch_size: 32 - val_batch_size: 64 + train_batch_size: 16 + val_batch_size: 8 return_raw_chat: True return_raw_input_ids: True @@ -79,9 +79,9 @@ actor_rollout_ref: use_remove_padding: True actor: strategy: fsdp # This is for backward-compatibility - ppo_mini_batch_size: 32 + ppo_mini_batch_size: 4 ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu - ppo_micro_batch_size_per_gpu: 8 + ppo_micro_batch_size_per_gpu: 4 use_dynamic_bsz: False ppo_max_token_len_per_gpu: 49152 # n * ${data.max_prompt_length} + ${data.max_response_length} grad_clip: 1.0