mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-27 17:23:19 +00:00
26 lines
655 B
YAML
26 lines
655 B
YAML
model_path: Qwen/Qwen2.5-3B-Instruct # Default model path
|
|
# model_path: joesharratt29/spell_backward_non_curriculum
|
|
# model_path: joesharratt29/spell_backward_curriculum
|
|
|
|
max_tokens: 2048 # From max_response_length in training config
|
|
top_p: 1.0
|
|
temperature: 1.0 #
|
|
|
|
developer_prompt: DeepSeekZero
|
|
developer_role: system
|
|
|
|
output_dir: results
|
|
save_metadata: true
|
|
save_full_results: true
|
|
eval_repeats: 3
|
|
|
|
categories:
|
|
- category: algorithmic
|
|
datasets:
|
|
- dataset: spell_backward
|
|
size: 100
|
|
seed: 42
|
|
params:
|
|
min_word_len: 3 # Minimum word length
|
|
max_word_len: 10
|
|
data_file: holdout_words.txt
|