InternBootcamp/examples/xpuyu_usage/bootcamp_rl/configs/example_training_config.py
2025-05-28 14:08:37 +08:00

70 lines
1.3 KiB
Python
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Automatic Generated Configs
# Model Related Settings
actor="actor_model_path"
reference="actor_model_path"
chat_template="r1"
dtype="auto"
selective_recompute=1.0
cpu_offload=False
cuda_graph=True
tp_size=4
sp_size=1
# Dataset Related Settings
datasets="your dataset path"
data_difficulty_balance_cfg=None
num_workers=0
# Generate Related Settings
gen_global_batch=128
gen_max_new=8192
gen_max_length=10240
gen_top_k=0
gen_top_p=0.9
temperature=1
gen_do_sample=True
max_prefill_batch=8
prompt_repeat_k=4
# Optimizer Related Settings
rl_global_batch=128
rl_micro_batch=4
rl_mini_batch_steps=1
warmup_steps=0
actor_lr=1e-6
actor_min_lr=1e-6
wd=0.01
max_grad_norm=1
# General Settings
work_dir="examples/xpuyu_usage/ckpts/experiment_name"
checkpoint_interval=40
log_interval=1
seed=0
debug=True
# Reward Settings
reward_shaping_type="grpo"
loss_type="per_token"
kl_coef=0.01
stop_word="<end▁of▁sentence>"
judgers_config = dict(
bootcamp_judger=dict(
stop_word=stop_word,
num_processes=8,
concurrency_per_proc=(8, 8),
),
)
import json
data_judger_mapping = {}
with open("dataset_path", "r") as f:
for line in f:
item = json.loads(line)
data_judger_mapping[item["metadata"]["data_source"]] = ["bootcamp_judger"]