From fee4e37ae4c5cf4bc96a6aa26de3df6e8c583dda Mon Sep 17 00:00:00 2001 From: joesharratt1229 Date: Tue, 25 Mar 2025 05:38:07 +0000 Subject: [PATCH] added composite changes --- .../configs/qwen2.5_3b_grpo_composite.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/training/configs/qwen2.5_3b_grpo_composite.yaml b/training/configs/qwen2.5_3b_grpo_composite.yaml index 1c51e0bc..3c489ffe 100644 --- a/training/configs/qwen2.5_3b_grpo_composite.yaml +++ b/training/configs/qwen2.5_3b_grpo_composite.yaml @@ -30,7 +30,7 @@ reward: use_accuracy: True secondary_rewards: - name: format - scaling_factor: 0.5 + scaling_factor: 0.2 data: tokenizer: null @@ -39,8 +39,8 @@ data: prompt_key: prompt max_prompt_length: 512 max_response_length: 1024 - train_batch_size: 128 - val_batch_size: 128 + train_batch_size: 64 + val_batch_size: 64 return_raw_chat: True return_raw_input_ids: True @@ -56,7 +56,7 @@ actor_rollout_ref: strategy: fsdp # This is for backward-compatibility ppo_mini_batch_size: 32 ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu - ppo_micro_batch_size_per_gpu: 8 + ppo_micro_batch_size_per_gpu: 160 use_dynamic_bsz: False ppo_max_token_len_per_gpu: 12288 # n * ${data.max_prompt_length} + ${data.max_response_length} grad_clip: 1.0 @@ -70,9 +70,9 @@ actor_rollout_ref: ulysses_sequence_parallel_size: 1 # sp size optim: lr: 1e-6 - lr_warmup_steps_ratio: 0.1 # the total steps will be injected during runtime - min_lr_ratio: 0.1 # only useful for warmup with cosine - warmup_style: cosine # select from constant/cosine + lr_warmup_steps_ratio: 0 # the total steps will be injected during runtime + min_lr_ratio: null # only useful for warmup with cosine + warmup_style: constant # select from constant/cosine total_training_steps: -1 # must be override by program fsdp_config: wrap_policy: @@ -88,7 +88,7 @@ actor_rollout_ref: # transformer_layer_cls_to_wrap: None min_num_params: 0 log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu - log_prob_micro_batch_size_per_gpu: 16 + log_prob_micro_batch_size_per_gpu: 160 log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz} log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu} ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size @@ -106,7 +106,7 @@ actor_rollout_ref: enforce_eager: True free_cache_engine: True load_format: dummy_dtensor - tensor_model_parallel_size: 2 + tensor_model_parallel_size: 4 max_num_batched_tokens: 8192 max_num_seqs: 1024 log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu