added composite changes

This commit is contained in:
joesharratt1229 2025-03-25 05:38:07 +00:00
parent a8b1408967
commit fee4e37ae4

View file

@ -30,7 +30,7 @@ reward:
use_accuracy: True
secondary_rewards:
- name: format
scaling_factor: 0.5
scaling_factor: 0.2
data:
tokenizer: null
@ -39,8 +39,8 @@ data:
prompt_key: prompt
max_prompt_length: 512
max_response_length: 1024
train_batch_size: 128
val_batch_size: 128
train_batch_size: 64
val_batch_size: 64
return_raw_chat: True
return_raw_input_ids: True
@ -56,7 +56,7 @@ actor_rollout_ref:
strategy: fsdp # This is for backward-compatibility
ppo_mini_batch_size: 32
ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 8
ppo_micro_batch_size_per_gpu: 160
use_dynamic_bsz: False
ppo_max_token_len_per_gpu: 12288 # n * ${data.max_prompt_length} + ${data.max_response_length}
grad_clip: 1.0
@ -70,9 +70,9 @@ actor_rollout_ref:
ulysses_sequence_parallel_size: 1 # sp size
optim:
lr: 1e-6
lr_warmup_steps_ratio: 0.1 # the total steps will be injected during runtime
min_lr_ratio: 0.1 # only useful for warmup with cosine
warmup_style: cosine # select from constant/cosine
lr_warmup_steps_ratio: 0 # the total steps will be injected during runtime
min_lr_ratio: null # only useful for warmup with cosine
warmup_style: constant # select from constant/cosine
total_training_steps: -1 # must be override by program
fsdp_config:
wrap_policy:
@ -88,7 +88,7 @@ actor_rollout_ref:
# transformer_layer_cls_to_wrap: None
min_num_params: 0
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
log_prob_micro_batch_size_per_gpu: 16
log_prob_micro_batch_size_per_gpu: 160
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
@ -106,7 +106,7 @@ actor_rollout_ref:
enforce_eager: True
free_cache_engine: True
load_format: dummy_dtensor
tensor_model_parallel_size: 2
tensor_model_parallel_size: 4
max_num_batched_tokens: 8192
max_num_seqs: 1024
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu