atropos/example_trainer/grpo.py

#!/usr/bin/env python3
"""
GRPO Trainer - Main Entry Point

This is the command-line entry point for the GRPO trainer.
For the actual implementation, see the modular files:

- config.py      - TrainingConfig class
- api.py         - Atropos API communication
- data.py        - Data processing and batching
- model.py       - Model loading and shared memory
- training.py    - Loss computation and training step
- checkpointing.py - Checkpoint saving
- vllm_manager.py  - vLLM process management
- trainers.py    - Training mode implementations
- cli.py         - CLI argument parsing

Usage:
    # Legacy mode (checkpoint + restart)
    python grpo.py --model-name Qwen/Qwen2.5-3B-Instruct --training-steps 100

    # Single-copy mode (shared memory)
    python grpo.py --model-name Qwen/Qwen2.5-3B-Instruct --weight-bridge-mode shared_vllm

    # LoRA mode (adapter training)
    python grpo.py --model-name Qwen/Qwen2.5-3B-Instruct --weight-bridge-mode lora_only
"""

from .cli import parse_args, config_from_args
from .trainers import train_legacy, train_shared_vllm, train_lora


def main():
    """Main entry point for GRPO trainer."""
    args = parse_args()
    config = config_from_args(args)

    print(f"Weight bridge mode: {config.weight_bridge_mode}")

    if config.weight_bridge_mode == "shared_vllm":
        # Single-copy mode: attach to vLLM's weights, update in-place
        train_shared_vllm(config)

    elif config.weight_bridge_mode == "lora_only":
        # LoRA mode: freeze base model, train adapters only
        train_lora(config)

    else:
        # Legacy mode: periodic checkpoint saves + vLLM restarts
        train_legacy(config)


if __name__ == "__main__":
    main()