major refactor

2026-04-22 16:48:57 +00:00 · 2026-01-22 11:38:10 -05:00 · 2026-01-22 11:38:10 -05:00 · 6833d4d820
commit 6833d4d820
parent 119721ef3d
13 changed files with 3268 additions and 3423 deletions
--- a/example_trainer/init.py
+++ b/example_trainer/init.py
@ -1,7 +1,34 @@
 """
-Example trainer implementations of how to implement a trainer for the Atropos library.
+GRPO (Group Relative Policy Optimization) Trainer
+
+A training framework for fine-tuning language models with reinforcement learning,
+designed to work with the Atropos environment system.
+
+Supports three training modes:
+- Legacy: Checkpoint-based training with vLLM restarts
+- Shared vLLM: Single-copy mode with CUDA IPC (no model duplication!)
+- LoRA: Adapter-only training with hot-swap capability
+
+Usage:
+    # As CLI
+    python -m example_trainer.grpo --model-name Qwen/Qwen2.5-3B-Instruct --training-steps 100
+
+    # As library
+    from example_trainer import TrainingConfig, train_legacy, train_shared_vllm, train_lora
+
+    config = TrainingConfig(model_name="Qwen/Qwen2.5-3B-Instruct", training_steps=100)
+    train_legacy(config)
 """

-from example_trainer.grpo import TrainingConfig, train
+from .config import TrainingConfig
+from .trainers import train_legacy, train_shared_vllm, train_lora
+from .cli import parse_args, config_from_args

-__all__ = ["TrainingConfig", "train"]
+__all__ = [
+    "TrainingConfig",
+    "train_legacy",
+    "train_shared_vllm",
+    "train_lora",
+    "parse_args",
+    "config_from_args",
+]