reasoning-gym/tools/cli/rgc/main.py
Andreas Köpf e2702092f4
reasoning-gym-server & cli tool (#154)
* feat: Add initial server structure with configuration, registry, and middleware

* feat: Add chain_sum dataset to experiment registry test

* fix: Update test_registry to use DatasetSpec for composite config validation

* refactor: Update Pydantic config to use json_schema_extra and ConfigDict

* feat: Add Pydantic models for API request/response data

* feat: Implement basic experiment management endpoints with tests

* feat: Implement composite configuration endpoints for experiments

* fix: Add missing DatasetConfigUpdate import in server.py

* refactor: Update dataset config update method to properly merge config updates

* fix: Correctly retrieve current dataset config in composite endpoint

* feat: Add basic CLI structure with experiments and config commands

* feat: Add initial CLI tool with basic experiment management commands

* refactor: Reorganize CLI package structure and fix import paths

* refactor: Implement initial CLI commands for experiment management

* feat: Implement HTTP client for Reasoning Gym server in RGC CLI tool

* fix: Move print statements inside try block to resolve SyntaxError

* fix: Resolve SyntaxError in edit_config function by adding missing except block

* feat: Add default app instance in server module for easier uvicorn startup

* docs: Add README.md with server and RGC tool documentation

* remove unused files

* refactor: Remove unsupported type annotation in registry.py

* refactor: Move ExperimentRegistry to coaching module and add Experiment class

* fix: Add missing CompositeDataset import in test_registry.py

* refactor: Implement lazy ASGI app creation for server initialization

* feat: Add health check command to RGC CLI for server connection

* feat: Add version tracking support to CompositeDataset

* feat: Add DatasetVersionManager for tracking dataset versions

* feat: Add entry_id metadata and score_answer_with_id method to CompositeDataset

* feat: Add entry_id metadata combining version and index

* fix: Resolve undefined variable by storing version_id before use

* test: Add comprehensive unit tests for score_answer_with_id() function

* test: Add comprehensive version tracking test for dataset config updates

* feat: Validate dataset weights are positive in CompositeDataset initialization

* feat: Add weight update and normalization methods to CompositeDataset

* refactor: Centralize weight normalization in CompositeDataset and allow zero-weight datasets

* feat: Add negative weight validation to CompositeDataset constructor

* feat: Add duplicate dataset name check in CompositeDataset and update test

* refactor: Move duplicate dataset name check inside dataset iteration loop

* refactor: Update CompositeDataset weight management to use config as source of truth

* refactor: Move duplicate dataset name check to CompositeConfig.validate()

* test: Update composite dataset weight test assertions and validation

* feat: Add methods to add and remove datasets in CompositeDataset

* refactor: Remove weight normalization and use unnormalized weights directly

* refactor: Remove redundant total weight check in update_dataset_weights

* feat: Add batch generation and scoring endpoints to server

* fix: Import BatchEntry in server.py to resolve undefined name error

* refactor: Update ReasoningGymDataset to use server for batch generation and scoring

* fix: Add missing List and Dict type imports

* feat: Add get_batch() and score_outputs() methods to RGClient

* test: Add unit tests for generate_batch and score_outputs endpoints

* refactor: Add DatasetVersionManager to Experiment class and CompositeDataset constructor

* feat: Add validation for base_index and batch_size in generate_batch endpoint

* refactor: Remove unused BatchRequest type from imports

* refactor: Convert models to use Pydantic exclusively

* test: Update scoring endpoint tests to use correct request model format

* refactor: Rename ScoreItem to AnswerItem and update related code

* feat: Update scoring endpoint to return ordered ScoringResponse with scores and entry_ids

* fix: Add missing ScoringResponse import in server.py

* move verl ppo sample with server into own file

* refactor: Use Pydantic models for get_batch() and score_outputs() in RGClient

* refactor: Update client methods to use Pydantic models for type safety

* refactor: Use Pydantic models for experiment and dataset config operations

* refactor: Clean up duplicate methods and improve error handling in main.py

* first bits of rg server use for verl

* refactor: Optimize scoring with single HTTP request in _score_output

* fix: Correct experiment creation with ExperimentCreate object

* grpo tests with server
2025-02-19 22:41:33 +01:00

231 lines
8.1 KiB
Python

"""Main entry point for the Reasoning Gym CLI."""
import os
from typing import Optional
import typer
import yaml
from rich.console import Console
from rich.prompt import Confirm, Prompt
from rich.syntax import Syntax
from rich.table import Table
from tools.server.models import DatasetConfigUpdate, ExperimentCreate
# Initialize Typer apps
app = typer.Typer(
name="rgc",
help="Reasoning Gym CLI - Manage and monitor reasoning gym experiments",
add_completion=True,
)
experiments_app = typer.Typer(help="Manage experiments")
config_app = typer.Typer(help="Manage configurations")
app.add_typer(experiments_app, name="experiments")
app.add_typer(config_app, name="config")
@app.command("health")
def check_health():
"""Check server connection and health status."""
try:
if client.check_health():
console.print("[green]Server is healthy[/]")
else:
console.print("[red]Server is not responding correctly[/]")
raise typer.Exit(1)
except Exception as e:
console.print(f"[red]Error connecting to server: {e}[/]")
raise typer.Exit(1)
# Initialize client and console
from .client import RGClient
client = RGClient()
console = Console()
@experiments_app.command("list")
def list_experiments():
"""List all registered experiments with their status."""
table = Table(title="Registered Experiments")
table.add_column("Name", style="cyan")
table.add_column("Datasets", style="magenta")
table.add_column("Size", style="blue")
table.add_column("Seed", style="green")
try:
experiments = client.list_experiments()
for exp_name in experiments.experiments:
try:
config = client.get_experiment_config(exp_name)
datasets = ", ".join(config.datasets.keys())
table.add_row(exp_name, datasets, str(config.size), str(config.seed or ""))
except Exception as e:
console.print(f"[yellow]Warning: Could not get config for {exp_name}: {e}[/]")
table.add_row(exp_name, "?", "?", "?")
except Exception as e:
console.print(f"[red]Error listing experiments: {e}[/]")
raise typer.Exit(1)
console.print(table)
@experiments_app.command("create")
def create_experiment(
name: str = typer.Argument(..., help="Name of the experiment"),
config_file: Optional[str] = typer.Option(None, "--file", "-f", help="YAML configuration file"),
):
"""Create a new experiment."""
if config_file:
try:
with open(config_file, "r") as f:
exp_config = yaml.safe_load(f)
config = ExperimentCreate(**exp_config)
response = client.create_experiment(name, config)
console.print(f"[green]Created experiment[/] [cyan]{response.name}[/]")
except Exception as e:
console.print(f"[red]Error creating experiment: {e}[/]")
raise typer.Exit(1)
else:
# Interactive creation
size = Prompt.ask("Dataset size", default="500")
seed = Prompt.ask("Random seed (optional)", default="")
datasets = {}
while Confirm.ask("Add dataset?"):
ds_name = Prompt.ask("Dataset name")
weight = float(Prompt.ask("Weight", default="1.0"))
# Get dataset-specific config
console.print("\nEnter dataset configuration:")
config = {}
while Confirm.ask("Add config parameter?"):
key = Prompt.ask("Parameter name")
value = Prompt.ask("Parameter value")
try:
# Try to convert to appropriate type
if value.isdigit():
value = int(value)
elif value.lower() in ("true", "false"):
value = value.lower() == "true"
elif "." in value and value.replace(".", "").isdigit():
value = float(value)
except ValueError:
pass
config[key] = value
datasets[ds_name] = {"weight": weight, "config": config}
# Create experiment config
exp_config = {"name": name, "size": int(size), "seed": int(seed) if seed else None, "datasets": datasets}
# Show final config
console.print("\nFinal configuration:")
console.print(Syntax(yaml.dump(exp_config), "yaml"))
if Confirm.ask("Create experiment with this configuration?"):
try:
config = ExperimentCreate(**exp_config)
response = client.create_experiment(name, config)
console.print(f"[green]Created experiment[/] [cyan]{response.name}[/]")
except Exception as e:
console.print(f"[red]Error creating experiment: {e}[/]")
raise typer.Exit(1)
else:
console.print("[yellow]Experiment creation cancelled[/]")
raise typer.Exit()
@experiments_app.command("delete")
def delete_experiment(
name: str = typer.Argument(..., help="Name of the experiment to delete"),
force: bool = typer.Option(False, "--force", "-f", help="Force deletion without confirmation"),
):
"""Delete an experiment."""
if not force and not Confirm.ask(f"Delete experiment [cyan]{name}[/]?"):
raise typer.Exit()
try:
client.delete_experiment(name)
console.print(f"[green]Deleted experiment[/] [cyan]{name}[/]")
except Exception as e:
console.print(f"[red]Error deleting experiment: {e}[/]")
raise typer.Exit(1)
@experiments_app.command("show")
def show_experiment(
name: str = typer.Argument(..., help="Name of the experiment"),
):
"""Show experiment details."""
try:
config = client.get_experiment_config(name)
console.print(Syntax(yaml.dump(config.model_dump()), "yaml"))
except Exception as e:
console.print(f"[red]Error getting experiment config: {e}[/]")
raise typer.Exit(1)
@config_app.command("edit")
def edit_config(
experiment: str = typer.Argument(..., help="Name of the experiment"),
dataset: str = typer.Argument(..., help="Name of the dataset to edit"),
):
"""Interactive configuration editor."""
try:
exp_config = client.get_experiment_config(experiment)
if dataset not in exp_config.datasets:
console.print(f"[red]Dataset {dataset} not found in experiment[/]")
raise typer.Exit(1)
current_config = exp_config.datasets[dataset]["config"]
console.print(f"\nCurrent configuration for [cyan]{dataset}[/]:")
console.print(Syntax(yaml.dump(current_config), "yaml"))
# Interactive editing
new_config = {}
for key, value in current_config.items():
new_value = Prompt.ask(f"{key}", default=str(value), show_default=True)
# Try to convert to appropriate type
try:
if isinstance(value, bool):
new_value = new_value.lower() == "true"
elif isinstance(value, int):
new_value = int(new_value)
elif isinstance(value, float):
new_value = float(new_value)
except ValueError:
console.print(f"[yellow]Warning: Could not convert {new_value} to {type(value)}[/]")
new_config[key] = new_value
# Show changes
console.print("\nNew configuration:")
console.print(Syntax(yaml.dump(new_config), "yaml"))
if Confirm.ask("Apply these changes?"):
try:
config_update = DatasetConfigUpdate(config=new_config)
client.update_dataset_config(experiment, dataset, config_update)
console.print("[green]Configuration updated successfully[/]")
except Exception as e:
console.print(f"[red]Error updating configuration: {e}[/]")
raise typer.Exit(1)
else:
console.print("[yellow]Update cancelled[/]")
except Exception as e:
console.print(f"[red]Error getting experiment configuration: {e}[/]")
raise typer.Exit(1)
def main():
"""Entry point for the CLI."""
app()
if __name__ == "__main__":
main()