diff --git a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml b/training/configs/external_generalisation/math_qwen_7b.yaml similarity index 99% rename from training/configs/external_generalisation/math_curriculum_qwen_7b.yaml rename to training/configs/external_generalisation/math_qwen_7b.yaml index 344722ad..1599848f 100644 --- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml +++ b/training/configs/external_generalisation/math_qwen_7b.yaml @@ -71,7 +71,7 @@ actor_rollout_ref: clip_ratio_low: 0.2 clip_ratio_high: 0.2 loss_agg_mode: "token-mean" # / "seq-mean-token-sum" / "seq-mean-token-mean" - entropy_coeff: 0.001 + entropy_coeff: 0.000 use_kl_loss: False # True for GRPO kl_loss_coef: 0.001 # for grpo kl_loss_type: low_var_kl # for grpo