mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-29 17:35:16 +00:00
added composite changes
This commit is contained in:
parent
a8b1408967
commit
fee4e37ae4
1 changed files with 9 additions and 9 deletions
|
|
@ -30,7 +30,7 @@ reward:
|
||||||
use_accuracy: True
|
use_accuracy: True
|
||||||
secondary_rewards:
|
secondary_rewards:
|
||||||
- name: format
|
- name: format
|
||||||
scaling_factor: 0.5
|
scaling_factor: 0.2
|
||||||
|
|
||||||
data:
|
data:
|
||||||
tokenizer: null
|
tokenizer: null
|
||||||
|
|
@ -39,8 +39,8 @@ data:
|
||||||
prompt_key: prompt
|
prompt_key: prompt
|
||||||
max_prompt_length: 512
|
max_prompt_length: 512
|
||||||
max_response_length: 1024
|
max_response_length: 1024
|
||||||
train_batch_size: 128
|
train_batch_size: 64
|
||||||
val_batch_size: 128
|
val_batch_size: 64
|
||||||
return_raw_chat: True
|
return_raw_chat: True
|
||||||
return_raw_input_ids: True
|
return_raw_input_ids: True
|
||||||
|
|
||||||
|
|
@ -56,7 +56,7 @@ actor_rollout_ref:
|
||||||
strategy: fsdp # This is for backward-compatibility
|
strategy: fsdp # This is for backward-compatibility
|
||||||
ppo_mini_batch_size: 32
|
ppo_mini_batch_size: 32
|
||||||
ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
|
ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
|
||||||
ppo_micro_batch_size_per_gpu: 8
|
ppo_micro_batch_size_per_gpu: 160
|
||||||
use_dynamic_bsz: False
|
use_dynamic_bsz: False
|
||||||
ppo_max_token_len_per_gpu: 12288 # n * ${data.max_prompt_length} + ${data.max_response_length}
|
ppo_max_token_len_per_gpu: 12288 # n * ${data.max_prompt_length} + ${data.max_response_length}
|
||||||
grad_clip: 1.0
|
grad_clip: 1.0
|
||||||
|
|
@ -70,9 +70,9 @@ actor_rollout_ref:
|
||||||
ulysses_sequence_parallel_size: 1 # sp size
|
ulysses_sequence_parallel_size: 1 # sp size
|
||||||
optim:
|
optim:
|
||||||
lr: 1e-6
|
lr: 1e-6
|
||||||
lr_warmup_steps_ratio: 0.1 # the total steps will be injected during runtime
|
lr_warmup_steps_ratio: 0 # the total steps will be injected during runtime
|
||||||
min_lr_ratio: 0.1 # only useful for warmup with cosine
|
min_lr_ratio: null # only useful for warmup with cosine
|
||||||
warmup_style: cosine # select from constant/cosine
|
warmup_style: constant # select from constant/cosine
|
||||||
total_training_steps: -1 # must be override by program
|
total_training_steps: -1 # must be override by program
|
||||||
fsdp_config:
|
fsdp_config:
|
||||||
wrap_policy:
|
wrap_policy:
|
||||||
|
|
@ -88,7 +88,7 @@ actor_rollout_ref:
|
||||||
# transformer_layer_cls_to_wrap: None
|
# transformer_layer_cls_to_wrap: None
|
||||||
min_num_params: 0
|
min_num_params: 0
|
||||||
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
|
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
|
||||||
log_prob_micro_batch_size_per_gpu: 16
|
log_prob_micro_batch_size_per_gpu: 160
|
||||||
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
|
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
|
||||||
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
|
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
|
||||||
ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
|
ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
|
||||||
|
|
@ -106,7 +106,7 @@ actor_rollout_ref:
|
||||||
enforce_eager: True
|
enforce_eager: True
|
||||||
free_cache_engine: True
|
free_cache_engine: True
|
||||||
load_format: dummy_dtensor
|
load_format: dummy_dtensor
|
||||||
tensor_model_parallel_size: 2
|
tensor_model_parallel_size: 4
|
||||||
max_num_batched_tokens: 8192
|
max_num_batched_tokens: 8192
|
||||||
max_num_seqs: 1024
|
max_num_seqs: 1024
|
||||||
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
|
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue