Feat/unsloth example (#482)

* cleaned up examples * updated failing hooks * updated readme * corrected linting checks
2026-04-19 12:58:07 +00:00 · 2025-06-28 17:04:38 +01:00 · 2025-06-28 17:04:38 +01:00 · 1c98584f28
commit 1c98584f28
parent d9cd20c174
29 changed files with 122 additions and 2857 deletions
--- a/examples/trl/config/grpo.yaml
+++ b/examples/trl/config/grpo.yaml
@ -1,15 +1,10 @@
 # Reasoning Gym configs
-dataset_size: 20000
+dataset_size: 10000
 developer_prompt: DeepSeekZero
 developer_role: system
 datasets:
-  simple_equations:
+  chain_sum:
    weight: 1
-  complex_arithmetic:
-    weight: 1
-    config:
-      min_real: -20
-      max_real: 20


 # Model configs from trl
@ -34,9 +29,9 @@ lr_scheduler_type: constant_with_warmup
 lr_scheduler_kwargs:
  num_warmup_steps: 10
 max_prompt_length: 512
-max_completion_length: 2048
+max_completion_length: 512
 max_steps: 100
-num_generations: 8
+num_generations: 16
 num_train_epochs: 1
 overwrite_output_dir: true
 per_device_train_batch_size: 8