mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-30 17:40:36 +00:00
28 lines
No EOL
883 B
YAML
28 lines
No EOL
883 B
YAML
# Rubik's Cube Solver Training Configuration
|
|
|
|
# Flattened configuration for TrainerConfig
|
|
model_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview" # Target model for training
|
|
learning_rate: 5.0e-6
|
|
batch_size: 8
|
|
gradient_accumulation_steps: 4
|
|
sequence_length: 2048
|
|
warmup_steps: 100
|
|
|
|
total_steps: 2000
|
|
eval_every: 50
|
|
save_every: 250
|
|
checkpoint_dir: "./rubiks_checkpoints"
|
|
use_wandb: true
|
|
wandb_project: "atropos-rubiks-cube"
|
|
wandb_run_name: "rubiks-solver-training"
|
|
|
|
train_file: "/Users/joshuajerin/Desktop/jarvis/atropos/environments/rubiks_process_results_22.jsonl"
|
|
validation_size: 0.1 # 10% for validation
|
|
prefer_higher_scores: true
|
|
max_samples: -1 # Use all samples
|
|
|
|
method: "GRPO" # Group Relative Policy Optimization
|
|
temperature: 0.7
|
|
top_p: 0.9
|
|
beta: 0.1 # KL penalty coefficient
|
|
reference_model: "NousResearch/DeepHermes-3-Llama-3-3B-Preview" # Smaller reference model |