cfg

2026-04-25 17:10:51 +00:00 · 2025-04-28 23:41:37 +01:00 · 2025-04-28 23:41:37 +01:00 · 40e3293299
commit 40e3293299
parent e60e202db9
1 changed files with 18 additions and 36 deletions
--- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml
+++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml
@ -1,12 +1,26 @@
 reasoning_gym:
-  dataset_size: 50000
+  dataset_size: 30000
  developer_prompt: DeepSeekZero
  datasets:
-    dummy:
+    complex_arithmetic:
+      weight: 1
+    intermediate_integration:
+      weight: 1
+    polynomial_equations:
+      weight: 1
+    polynomial_multiplication:
+      weight: 1
+    advanced_geometry:
+      weight: 1
+    bitwise_arithmetic:
+      weight: 1
+    chain_sum:
+      weight: 1
+    decimal_chain_sum:
      weight: 1

 curriculum:
-    enabled: True
+    enabled: False
    schedule:
      automatic: True
      update_steps: 30 # automatic curriculum updating after 30 steps
@ -18,38 +32,6 @@ curriculum:
        weight: 1
        attribute_levels:
          '*': 0
-      intermediate_integration:
-        weight: 1
-        attribute_levels:
-          '*': 0
-      polynomial_equations:
-        weight: 1
-        attribute_levels:
-          '*': 0
-      polynomial_multiplication:
-        weight: 1
-        attribute_levels:
-          '*': 0
-      advanced_geometry:
-        weight: 1
-        attribute_levels:
-          '*': 0
-      bitwise_arithmetic:
-        weight: 1
-        attribute_levels:
-          '*': 0
-      chain_sum:
-        weight: 1
-        attribute_levels:
-          '*': 0
-      decimal_arithmetic:
-        weight: 1
-        attribute_levels:
-          '*': 0
-      decimal_chain_sum:
-        weight: 1
-        attribute_levels:
-          '*': 0
 reward:
  use_accuracy: True
  conditional_reward: True  # Only provide a reward at all if the response format is correct
@ -170,7 +152,7 @@ verbose: True
 trainer:
  balance_batch: True
  total_epochs: 1
-  total_training_steps: 1500
+  total_training_steps: 500
  project_name: external-generalisation
  experiment_name: math_curriculum_qwen_7b
  logger: [ 'console', 'wandb' ]