diff --git a/examples/xpuyu_usage/bootcamp_rl/configs/example_training_config.py b/examples/xpuyu_usage/bootcamp_rl/configs/example_training_config.py new file mode 100755 index 0000000..83e7696 --- /dev/null +++ b/examples/xpuyu_usage/bootcamp_rl/configs/example_training_config.py @@ -0,0 +1,70 @@ +# Automatic Generated Configs + +# Model Related Settings +actor="actor_model_path" +reference="actor_model_path" +chat_template="r1" +dtype="auto" +selective_recompute=1.0 +cpu_offload=False +cuda_graph=True +tp_size=4 +sp_size=1 + +# Dataset Related Settings +datasets="your dataset path" +data_difficulty_balance_cfg=None +num_workers=0 + +# Generate Related Settings +gen_global_batch=128 +gen_max_new=8192 +gen_max_length=10240 +gen_top_k=0 +gen_top_p=0.9 +temperature=1 +gen_do_sample=True +max_prefill_batch=8 +prompt_repeat_k=4 + +# Optimizer Related Settings +rl_global_batch=128 +rl_micro_batch=4 +rl_mini_batch_steps=1 +warmup_steps=0 +actor_lr=1e-6 +actor_min_lr=1e-6 +wd=0.01 +max_grad_norm=1 + +# General Settings +work_dir="examples/xpuyu_usage/ckpts/experiment_name" +checkpoint_interval=40 +log_interval=1 +seed=0 +debug=True + +# Reward Settings +reward_shaping_type="grpo" +loss_type="per_token" +kl_coef=0.01 +stop_word="<|end▁of▁sentence|>" + + +judgers_config = dict( + bootcamp_judger=dict( + stop_word=stop_word, + num_processes=8, + concurrency_per_proc=(8, 8), + ), +) + +import json +data_judger_mapping = {} + +with open("dataset_path", "r") as f: + for line in f: + item = json.loads(line) + data_judger_mapping[item["metadata"]["data_source"]] = ["bootcamp_judger"] + +