train

2026-04-30 17:40:36 +00:00 · 2025-05-18 16:00:43 -07:00 · 2025-05-18 16:00:43 -07:00 · b298a0eeb6
commit b298a0eeb6
parent 7e1de80695
2 changed files with 492 additions and 0 deletions
--- a/configs/rubiks_training.yaml
+++ b/configs/rubiks_training.yaml
@ -0,0 +1,28 @@
+# Rubik's Cube Solver Training Configuration
+
+# Flattened configuration for TrainerConfig
+model_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview"  # Target model for training
+learning_rate: 5.0e-6
+batch_size: 8
+gradient_accumulation_steps: 4
+sequence_length: 2048
+warmup_steps: 100
+
+total_steps: 2000
+eval_every: 50
+save_every: 250
+checkpoint_dir: "./rubiks_checkpoints"
+use_wandb: true
+wandb_project: "atropos-rubiks-cube"
+wandb_run_name: "rubiks-solver-training"
+
+train_file: "/Users/joshuajerin/Desktop/jarvis/atropos/environments/rubiks_process_results_22.jsonl"
+validation_size: 0.1  # 10% for validation
+prefer_higher_scores: true
+max_samples: -1  # Use all samples
+
+method: "GRPO"  # Group Relative Policy Optimization
+temperature: 0.7
+top_p: 0.9
+beta: 0.1  # KL penalty coefficient
+reference_model: "NousResearch/DeepHermes-3-Llama-3-3B-Preview"  # Smaller reference model