mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-27 17:23:19 +00:00
added training and evaluation curr conf
This commit is contained in:
parent
d9cd20c174
commit
3393d22611
12 changed files with 1384 additions and 4 deletions
25
training/evaluations/curriculum/count_primes.yaml
Normal file
25
training/evaluations/curriculum/count_primes.yaml
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
model_path: Qwen/Qwen2.5-3B-Instruct # Default model path
|
||||
# model_path: joesharratt29/count_prime_curriculum
|
||||
# model_path: joesharratt29/count_primes_non_curriculum
|
||||
|
||||
max_tokens: 2048 # From max_response_length in training config
|
||||
top_p: 1.0
|
||||
temperature: 1.0 #
|
||||
|
||||
developer_prompt: DeepSeekZero
|
||||
developer_role: system
|
||||
|
||||
output_dir: results
|
||||
save_metadata: true
|
||||
save_full_results: true
|
||||
eval_repeats: 3
|
||||
|
||||
categories:
|
||||
- category: algorithmic
|
||||
datasets:
|
||||
- dataset: count_primes
|
||||
size: 100
|
||||
seed: 42
|
||||
params:
|
||||
min_n: 100
|
||||
max_n: 5000
|
||||
25
training/evaluations/curriculum/mini_sudoku.yaml
Normal file
25
training/evaluations/curriculum/mini_sudoku.yaml
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
model_path: joesharratt29/mini_sudoku_non_curriculum # Default model path
|
||||
# model_path: joesharratt29/mini_sudoku_non_curriculum
|
||||
# model_path: joesharratt29/mini_sudoku_curriculum
|
||||
|
||||
max_tokens: 2048 # From max_response_length in training config
|
||||
top_p: 1.0
|
||||
temperature: 1.0 #
|
||||
|
||||
developer_prompt: DeepSeekZero
|
||||
developer_role: system
|
||||
|
||||
output_dir: results
|
||||
save_metadata: true
|
||||
save_full_results: true
|
||||
eval_repeats: 3
|
||||
|
||||
categories:
|
||||
- category: algorithmic
|
||||
datasets:
|
||||
- dataset: mini_sudoku
|
||||
size: 100
|
||||
seed: 42
|
||||
params:
|
||||
min_empty: 4
|
||||
max_empty: 12
|
||||
26
training/evaluations/curriculum/spell_backward.yaml
Normal file
26
training/evaluations/curriculum/spell_backward.yaml
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
model_path: Qwen/Qwen2.5-3B-Instruct # Default model path
|
||||
# model_path: joesharratt29/spell_backward_non_curriculum
|
||||
# model_path: joesharratt29/spell_backward_curriculum
|
||||
|
||||
max_tokens: 2048 # From max_response_length in training config
|
||||
top_p: 1.0
|
||||
temperature: 1.0 #
|
||||
|
||||
developer_prompt: DeepSeekZero
|
||||
developer_role: system
|
||||
|
||||
output_dir: results
|
||||
save_metadata: true
|
||||
save_full_results: true
|
||||
eval_repeats: 3
|
||||
|
||||
categories:
|
||||
- category: algorithmic
|
||||
datasets:
|
||||
- dataset: spell_backward
|
||||
size: 100
|
||||
seed: 42
|
||||
params:
|
||||
min_word_len: 3 # Minimum word length
|
||||
max_word_len: 10
|
||||
data_file: holdout_words.txt
|
||||
Loading…
Add table
Add a link
Reference in a new issue