mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-29 17:35:16 +00:00
added composite changes
This commit is contained in:
parent
4a37dbb5c1
commit
a8b1408967
1 changed files with 1 additions and 1 deletions
|
|
@ -118,6 +118,7 @@ actor_rollout_ref:
|
|||
# for hf rollout
|
||||
do_sample: True
|
||||
use_fire_sampling: False
|
||||
max_model_len: 4096
|
||||
# number of responses (i.e. num sample times)
|
||||
n: 8 # > 1 for grpo
|
||||
val_kwargs:
|
||||
|
|
@ -188,7 +189,6 @@ critic:
|
|||
shuffle: ${actor_rollout_ref.actor.shuffle}
|
||||
grad_clip: 1.0
|
||||
cliprange_value: 0.5
|
||||
max_model_len: 4096
|
||||
|
||||
# Reward model not used for GRPO
|
||||
reward_model:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue