added composite changes

This commit is contained in:
joesharratt1229 2025-03-25 05:28:44 +00:00
parent 4a37dbb5c1
commit a8b1408967

View file

@ -118,6 +118,7 @@ actor_rollout_ref:
# for hf rollout
do_sample: True
use_fire_sampling: False
max_model_len: 4096
# number of responses (i.e. num sample times)
n: 8 # > 1 for grpo
val_kwargs:
@ -188,7 +189,6 @@ critic:
shuffle: ${actor_rollout_ref.actor.shuffle}
grad_clip: 1.0
cliprange_value: 0.5
max_model_len: 4096
# Reward model not used for GRPO
reward_model: