Fix chain sum veRL example for latest veRL (#371)

* fixes for latest verl * add balance_batch cofg * 1 -> 2 gpu * tweaks * also add raw ids to server script
2026-04-22 16:49:06 +00:00 · 2025-03-14 19:15:54 +00:00 · 2025-03-14 19:15:54 +00:00 · bd13b1b92a
commit bd13b1b92a
parent 8a0cacc054
5 changed files with 11 additions and 1 deletions
--- a/examples/veRL/chain_sum/config/grpo_trainer.yaml
+++ b/examples/veRL/chain_sum/config/grpo_trainer.yaml
@ -83,8 +83,11 @@ actor_rollout_ref:
    enable_chunked_prefill: True # could get higher throughput
    # for hf rollout
    do_sample: True
+    use_fire_sampling: False
    # number of responses (i.e. num sample times)
    n: 16 # > 1 for grpo
+    val_kwargs:
+      do_sample: True

 critic:
  strategy: fsdp
@ -151,6 +154,7 @@ algorithm:
    kl_coef: 0.001

 trainer:
+  balance_batch: True
  total_epochs: 30
  total_training_steps: null
  project_name: verl_examples