updated inter-domain generalisation eval configs (#432)

* tweak eval configs

* add eval configs

* add eval config
This commit is contained in:
Oliver Stanley 2025-05-15 08:08:16 +01:00 committed by GitHub
parent 4cab1c3e6d
commit 85f3c6dd02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 15 additions and 8 deletions

View file

@ -3,8 +3,9 @@
# Models evaluated on this config:
# Qwen/Qwen2.5-3B-Instruct (original model)
# inter_algorithmic_qwen_3b_500 (original + 500 GRPO steps on algorithmic RG data)
# inter_algebra_qwen_3b_500 (original + 500 GRPO steps on algebra RG data)
model_path: ../models/inter_algorithmic_qwen_3b_500 # Change to the model to be evaluated
model_path: ../models/inter_algebra_qwen_3b_500 # Change to the model to be evaluated
max_tokens: 2048 # From max_response_length in training config
top_p: 0.9 # From rollout top_p