added training and evaluation curr conf

2026-04-27 17:23:19 +00:00 · 2025-07-28 15:51:19 +01:00 · 2025-07-28 15:51:19 +01:00 · 3393d22611
commit 3393d22611
parent d9cd20c174
12 changed files with 1384 additions and 4 deletions
--- a/training/evaluations/curriculum/count_primes.yaml
+++ b/training/evaluations/curriculum/count_primes.yaml
@ -0,0 +1,25 @@
+model_path: Qwen/Qwen2.5-3B-Instruct  # Default model path
+# model_path: joesharratt29/count_prime_curriculum
+# model_path: joesharratt29/count_primes_non_curriculum
+
+max_tokens: 2048  # From max_response_length in training config
+top_p: 1.0
+temperature: 1.0  #
+
+developer_prompt: DeepSeekZero
+developer_role: system
+
+output_dir: results
+save_metadata: true
+save_full_results: true
+eval_repeats: 3
+
+categories:
+  - category: algorithmic
+    datasets:
+      - dataset: count_primes
+        size: 100
+        seed: 42
+        params:
+          min_n: 100
+          max_n: 5000
--- a/training/evaluations/curriculum/mini_sudoku.yaml
+++ b/training/evaluations/curriculum/mini_sudoku.yaml
@ -0,0 +1,25 @@
+model_path: joesharratt29/mini_sudoku_non_curriculum  # Default model path
+# model_path:  joesharratt29/mini_sudoku_non_curriculum
+# model_path: joesharratt29/mini_sudoku_curriculum
+
+max_tokens: 2048  # From max_response_length in training config
+top_p: 1.0
+temperature: 1.0  #
+
+developer_prompt: DeepSeekZero
+developer_role: system
+
+output_dir: results
+save_metadata: true
+save_full_results: true
+eval_repeats: 3
+
+categories:
+  - category: algorithmic
+    datasets:
+      - dataset: mini_sudoku
+        size: 100
+        seed: 42
+        params:
+          min_empty: 4
+          max_empty: 12
--- a/training/evaluations/curriculum/spell_backward.yaml
+++ b/training/evaluations/curriculum/spell_backward.yaml
@ -0,0 +1,26 @@
+model_path: Qwen/Qwen2.5-3B-Instruct   # Default model path
+# model_path: joesharratt29/spell_backward_non_curriculum
+# model_path: joesharratt29/spell_backward_curriculum
+
+max_tokens: 2048  # From max_response_length in training config
+top_p: 1.0
+temperature: 1.0  #
+
+developer_prompt: DeepSeekZero
+developer_role: system
+
+output_dir: results
+save_metadata: true
+save_full_results: true
+eval_repeats: 3
+
+categories:
+  - category: algorithmic
+    datasets:
+      - dataset: spell_backward
+        size: 100
+        seed: 42
+        params:
+          min_word_len: 3  # Minimum word length
+          max_word_len: 10
+          data_file: holdout_words.txt