Fix chain sum veRL example for latest veRL (#371)

* fixes for latest verl

* add balance_batch cofg

* 1 -> 2 gpu

* tweaks

* also add raw ids to server script
This commit is contained in:
Oliver Stanley 2025-03-14 19:15:54 +00:00 committed by GitHub
parent 8a0cacc054
commit bd13b1b92a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 11 additions and 1 deletions

View file

@ -83,8 +83,11 @@ actor_rollout_ref:
enable_chunked_prefill: True # could get higher throughput
# for hf rollout
do_sample: True
use_fire_sampling: False
# number of responses (i.e. num sample times)
n: 16 # > 1 for grpo
val_kwargs:
do_sample: True
critic:
strategy: fsdp
@ -151,6 +154,7 @@ algorithm:
kl_coef: 0.001
trainer:
balance_batch: True
total_epochs: 30
total_training_steps: null
project_name: verl_examples