allow longer outputs

This commit is contained in:
Oliver 2025-03-26 21:05:37 +00:00
parent 165d1de86e
commit 678eaa5b81

View file

@ -37,7 +37,7 @@ data:
val_files: test.parquet
prompt_key: prompt
max_prompt_length: 512
max_response_length: 1024
max_response_length: 4096
train_batch_size: 16
val_batch_size: 16
return_raw_input_ids: True # This should be set to true when the tokenizer between policy and rm differs
@ -57,7 +57,7 @@ actor_rollout_ref:
ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 8
use_dynamic_bsz: False
ppo_max_token_len_per_gpu: 12288 # n * ${data.max_prompt_length} + ${data.max_response_length}
ppo_max_token_len_per_gpu: 36864 # n * ${data.max_prompt_length} + ${data.max_response_length}
grad_clip: 1.0
clip_ratio: 0.2
entropy_coeff: 0.001