added composite changes

2026-04-29 17:35:16 +00:00 · 2025-03-25 05:28:44 +00:00 · 2025-03-25 05:28:44 +00:00 · a8b1408967
commit a8b1408967
parent 4a37dbb5c1
1 changed files with 1 additions and 1 deletions
--- a/training/configs/qwen2.5_3b_grpo_composite.yaml
+++ b/training/configs/qwen2.5_3b_grpo_composite.yaml
@ -118,6 +118,7 @@ actor_rollout_ref:
    # for hf rollout
    do_sample: True
    use_fire_sampling: False
+    max_model_len: 4096
    # number of responses (i.e. num sample times)
    n: 8 # > 1 for grpo
    val_kwargs:
@ -188,7 +189,6 @@ critic:
  shuffle: ${actor_rollout_ref.actor.shuffle}
  grad_clip: 1.0
  cliprange_value: 0.5
-  max_model_len: 4096

 # Reward model not used for GRPO
 reward_model: