Feat/unsloth example (#482)

* cleaned up examples

* updated failing hooks

* updated readme

* corrected linting checks
This commit is contained in:
joesharratt1229 2025-06-28 17:04:38 +01:00 committed by GitHub
parent d9cd20c174
commit 1c98584f28
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 122 additions and 2857 deletions

View file

@ -1,15 +1,10 @@
# Reasoning Gym configs
dataset_size: 20000
dataset_size: 10000
developer_prompt: DeepSeekZero
developer_role: system
datasets:
simple_equations:
chain_sum:
weight: 1
complex_arithmetic:
weight: 1
config:
min_real: -20
max_real: 20
# Model configs from trl
@ -34,9 +29,9 @@ lr_scheduler_type: constant_with_warmup
lr_scheduler_kwargs:
num_warmup_steps: 10
max_prompt_length: 512
max_completion_length: 2048
max_completion_length: 512
max_steps: 100
num_generations: 8
num_generations: 16
num_train_epochs: 1
overwrite_output_dir: true
per_device_train_batch_size: 8