updated inter-domain generalisation eval configs (#432)

* tweak eval configs * add eval configs * add eval config
2026-04-19 12:58:07 +00:00 · 2025-05-15 08:08:16 +01:00 · 2025-05-15 08:08:16 +01:00 · 85f3c6dd02
commit 85f3c6dd02
parent 4cab1c3e6d
7 changed files with 15 additions and 8 deletions
--- a/training/evaluations/inter_generalisation/arithmetic.yaml
+++ b/training/evaluations/inter_generalisation/arithmetic.yaml
@ -3,8 +3,9 @@
 # Models evaluated on this config:
 # Qwen/Qwen2.5-3B-Instruct (original model)
 # inter_algorithmic_qwen_3b_500 (original + 500 GRPO steps on algorithmic RG data)
+# inter_algebra_qwen_3b_500 (original + 500 GRPO steps on algebra RG data)

-model_path: ../models/inter_algorithmic_qwen_3b_500  # Change to the model to be evaluated
+model_path: ../models/inter_algebra_qwen_3b_500  # Change to the model to be evaluated

 max_tokens: 2048  # From max_response_length in training config
 top_p: 0.9  # From rollout top_p