mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-22 16:49:06 +00:00
Fix chain sum veRL example for latest veRL (#371)
* fixes for latest verl * add balance_batch cofg * 1 -> 2 gpu * tweaks * also add raw ids to server script
This commit is contained in:
parent
8a0cacc054
commit
bd13b1b92a
5 changed files with 11 additions and 1 deletions
|
|
@ -83,8 +83,11 @@ actor_rollout_ref:
|
|||
enable_chunked_prefill: True # could get higher throughput
|
||||
# for hf rollout
|
||||
do_sample: True
|
||||
use_fire_sampling: False
|
||||
# number of responses (i.e. num sample times)
|
||||
n: 16 # > 1 for grpo
|
||||
val_kwargs:
|
||||
do_sample: True
|
||||
|
||||
critic:
|
||||
strategy: fsdp
|
||||
|
|
@ -151,6 +154,7 @@ algorithm:
|
|||
kl_coef: 0.001
|
||||
|
||||
trainer:
|
||||
balance_batch: True
|
||||
total_epochs: 30
|
||||
total_training_steps: null
|
||||
project_name: verl_examples
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue