mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-22 16:48:57 +00:00
major refactor
This commit is contained in:
parent
119721ef3d
commit
6833d4d820
13 changed files with 3268 additions and 3423 deletions
|
|
@ -1,7 +1,34 @@
|
|||
"""
|
||||
Example trainer implementations of how to implement a trainer for the Atropos library.
|
||||
GRPO (Group Relative Policy Optimization) Trainer
|
||||
|
||||
A training framework for fine-tuning language models with reinforcement learning,
|
||||
designed to work with the Atropos environment system.
|
||||
|
||||
Supports three training modes:
|
||||
- Legacy: Checkpoint-based training with vLLM restarts
|
||||
- Shared vLLM: Single-copy mode with CUDA IPC (no model duplication!)
|
||||
- LoRA: Adapter-only training with hot-swap capability
|
||||
|
||||
Usage:
|
||||
# As CLI
|
||||
python -m example_trainer.grpo --model-name Qwen/Qwen2.5-3B-Instruct --training-steps 100
|
||||
|
||||
# As library
|
||||
from example_trainer import TrainingConfig, train_legacy, train_shared_vllm, train_lora
|
||||
|
||||
config = TrainingConfig(model_name="Qwen/Qwen2.5-3B-Instruct", training_steps=100)
|
||||
train_legacy(config)
|
||||
"""
|
||||
|
||||
from example_trainer.grpo import TrainingConfig, train
|
||||
from .config import TrainingConfig
|
||||
from .trainers import train_legacy, train_shared_vllm, train_lora
|
||||
from .cli import parse_args, config_from_args
|
||||
|
||||
__all__ = ["TrainingConfig", "train"]
|
||||
__all__ = [
|
||||
"TrainingConfig",
|
||||
"train_legacy",
|
||||
"train_shared_vllm",
|
||||
"train_lora",
|
||||
"parse_args",
|
||||
"config_from_args",
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue