major refactor

This commit is contained in:
Jai Suphavadeeprasit 2026-01-22 11:38:10 -05:00
parent 119721ef3d
commit 6833d4d820
13 changed files with 3268 additions and 3423 deletions

View file

@ -1,7 +1,34 @@
"""
Example trainer implementations of how to implement a trainer for the Atropos library.
GRPO (Group Relative Policy Optimization) Trainer
A training framework for fine-tuning language models with reinforcement learning,
designed to work with the Atropos environment system.
Supports three training modes:
- Legacy: Checkpoint-based training with vLLM restarts
- Shared vLLM: Single-copy mode with CUDA IPC (no model duplication!)
- LoRA: Adapter-only training with hot-swap capability
Usage:
# As CLI
python -m example_trainer.grpo --model-name Qwen/Qwen2.5-3B-Instruct --training-steps 100
# As library
from example_trainer import TrainingConfig, train_legacy, train_shared_vllm, train_lora
config = TrainingConfig(model_name="Qwen/Qwen2.5-3B-Instruct", training_steps=100)
train_legacy(config)
"""
from example_trainer.grpo import TrainingConfig, train
from .config import TrainingConfig
from .trainers import train_legacy, train_shared_vllm, train_lora
from .cli import parse_args, config_from_args
__all__ = ["TrainingConfig", "train"]
__all__ = [
"TrainingConfig",
"train_legacy",
"train_shared_vllm",
"train_lora",
"parse_args",
"config_from_args",
]