mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
* feat: Add initial server structure with configuration, registry, and middleware * feat: Add chain_sum dataset to experiment registry test * fix: Update test_registry to use DatasetSpec for composite config validation * refactor: Update Pydantic config to use json_schema_extra and ConfigDict * feat: Add Pydantic models for API request/response data * feat: Implement basic experiment management endpoints with tests * feat: Implement composite configuration endpoints for experiments * fix: Add missing DatasetConfigUpdate import in server.py * refactor: Update dataset config update method to properly merge config updates * fix: Correctly retrieve current dataset config in composite endpoint * feat: Add basic CLI structure with experiments and config commands * feat: Add initial CLI tool with basic experiment management commands * refactor: Reorganize CLI package structure and fix import paths * refactor: Implement initial CLI commands for experiment management * feat: Implement HTTP client for Reasoning Gym server in RGC CLI tool * fix: Move print statements inside try block to resolve SyntaxError * fix: Resolve SyntaxError in edit_config function by adding missing except block * feat: Add default app instance in server module for easier uvicorn startup * docs: Add README.md with server and RGC tool documentation * remove unused files * refactor: Remove unsupported type annotation in registry.py * refactor: Move ExperimentRegistry to coaching module and add Experiment class * fix: Add missing CompositeDataset import in test_registry.py * refactor: Implement lazy ASGI app creation for server initialization * feat: Add health check command to RGC CLI for server connection * feat: Add version tracking support to CompositeDataset * feat: Add DatasetVersionManager for tracking dataset versions * feat: Add entry_id metadata and score_answer_with_id method to CompositeDataset * feat: Add entry_id metadata combining version and index * fix: Resolve undefined variable by storing version_id before use * test: Add comprehensive unit tests for score_answer_with_id() function * test: Add comprehensive version tracking test for dataset config updates * feat: Validate dataset weights are positive in CompositeDataset initialization * feat: Add weight update and normalization methods to CompositeDataset * refactor: Centralize weight normalization in CompositeDataset and allow zero-weight datasets * feat: Add negative weight validation to CompositeDataset constructor * feat: Add duplicate dataset name check in CompositeDataset and update test * refactor: Move duplicate dataset name check inside dataset iteration loop * refactor: Update CompositeDataset weight management to use config as source of truth * refactor: Move duplicate dataset name check to CompositeConfig.validate() * test: Update composite dataset weight test assertions and validation * feat: Add methods to add and remove datasets in CompositeDataset * refactor: Remove weight normalization and use unnormalized weights directly * refactor: Remove redundant total weight check in update_dataset_weights * feat: Add batch generation and scoring endpoints to server * fix: Import BatchEntry in server.py to resolve undefined name error * refactor: Update ReasoningGymDataset to use server for batch generation and scoring * fix: Add missing List and Dict type imports * feat: Add get_batch() and score_outputs() methods to RGClient * test: Add unit tests for generate_batch and score_outputs endpoints * refactor: Add DatasetVersionManager to Experiment class and CompositeDataset constructor * feat: Add validation for base_index and batch_size in generate_batch endpoint * refactor: Remove unused BatchRequest type from imports * refactor: Convert models to use Pydantic exclusively * test: Update scoring endpoint tests to use correct request model format * refactor: Rename ScoreItem to AnswerItem and update related code * feat: Update scoring endpoint to return ordered ScoringResponse with scores and entry_ids * fix: Add missing ScoringResponse import in server.py * move verl ppo sample with server into own file * refactor: Use Pydantic models for get_batch() and score_outputs() in RGClient * refactor: Update client methods to use Pydantic models for type safety * refactor: Use Pydantic models for experiment and dataset config operations * refactor: Clean up duplicate methods and improve error handling in main.py * first bits of rg server use for verl * refactor: Optimize scoring with single HTTP request in _score_output * fix: Correct experiment creation with ExperimentCreate object * grpo tests with server
231 lines
8.1 KiB
Python
231 lines
8.1 KiB
Python
"""Main entry point for the Reasoning Gym CLI."""
|
|
|
|
import os
|
|
from typing import Optional
|
|
|
|
import typer
|
|
import yaml
|
|
from rich.console import Console
|
|
from rich.prompt import Confirm, Prompt
|
|
from rich.syntax import Syntax
|
|
from rich.table import Table
|
|
|
|
from tools.server.models import DatasetConfigUpdate, ExperimentCreate
|
|
|
|
# Initialize Typer apps
|
|
app = typer.Typer(
|
|
name="rgc",
|
|
help="Reasoning Gym CLI - Manage and monitor reasoning gym experiments",
|
|
add_completion=True,
|
|
)
|
|
experiments_app = typer.Typer(help="Manage experiments")
|
|
config_app = typer.Typer(help="Manage configurations")
|
|
|
|
app.add_typer(experiments_app, name="experiments")
|
|
app.add_typer(config_app, name="config")
|
|
|
|
|
|
@app.command("health")
|
|
def check_health():
|
|
"""Check server connection and health status."""
|
|
try:
|
|
if client.check_health():
|
|
console.print("[green]Server is healthy[/]")
|
|
else:
|
|
console.print("[red]Server is not responding correctly[/]")
|
|
raise typer.Exit(1)
|
|
except Exception as e:
|
|
console.print(f"[red]Error connecting to server: {e}[/]")
|
|
raise typer.Exit(1)
|
|
|
|
|
|
# Initialize client and console
|
|
from .client import RGClient
|
|
|
|
client = RGClient()
|
|
console = Console()
|
|
|
|
|
|
@experiments_app.command("list")
|
|
def list_experiments():
|
|
"""List all registered experiments with their status."""
|
|
table = Table(title="Registered Experiments")
|
|
table.add_column("Name", style="cyan")
|
|
table.add_column("Datasets", style="magenta")
|
|
table.add_column("Size", style="blue")
|
|
table.add_column("Seed", style="green")
|
|
|
|
try:
|
|
experiments = client.list_experiments()
|
|
for exp_name in experiments.experiments:
|
|
try:
|
|
config = client.get_experiment_config(exp_name)
|
|
datasets = ", ".join(config.datasets.keys())
|
|
table.add_row(exp_name, datasets, str(config.size), str(config.seed or ""))
|
|
except Exception as e:
|
|
console.print(f"[yellow]Warning: Could not get config for {exp_name}: {e}[/]")
|
|
table.add_row(exp_name, "?", "?", "?")
|
|
except Exception as e:
|
|
console.print(f"[red]Error listing experiments: {e}[/]")
|
|
raise typer.Exit(1)
|
|
|
|
console.print(table)
|
|
|
|
|
|
@experiments_app.command("create")
|
|
def create_experiment(
|
|
name: str = typer.Argument(..., help="Name of the experiment"),
|
|
config_file: Optional[str] = typer.Option(None, "--file", "-f", help="YAML configuration file"),
|
|
):
|
|
"""Create a new experiment."""
|
|
if config_file:
|
|
try:
|
|
with open(config_file, "r") as f:
|
|
exp_config = yaml.safe_load(f)
|
|
config = ExperimentCreate(**exp_config)
|
|
response = client.create_experiment(name, config)
|
|
console.print(f"[green]Created experiment[/] [cyan]{response.name}[/]")
|
|
except Exception as e:
|
|
console.print(f"[red]Error creating experiment: {e}[/]")
|
|
raise typer.Exit(1)
|
|
else:
|
|
# Interactive creation
|
|
size = Prompt.ask("Dataset size", default="500")
|
|
seed = Prompt.ask("Random seed (optional)", default="")
|
|
|
|
datasets = {}
|
|
while Confirm.ask("Add dataset?"):
|
|
ds_name = Prompt.ask("Dataset name")
|
|
weight = float(Prompt.ask("Weight", default="1.0"))
|
|
|
|
# Get dataset-specific config
|
|
console.print("\nEnter dataset configuration:")
|
|
config = {}
|
|
while Confirm.ask("Add config parameter?"):
|
|
key = Prompt.ask("Parameter name")
|
|
value = Prompt.ask("Parameter value")
|
|
try:
|
|
# Try to convert to appropriate type
|
|
if value.isdigit():
|
|
value = int(value)
|
|
elif value.lower() in ("true", "false"):
|
|
value = value.lower() == "true"
|
|
elif "." in value and value.replace(".", "").isdigit():
|
|
value = float(value)
|
|
except ValueError:
|
|
pass
|
|
config[key] = value
|
|
|
|
datasets[ds_name] = {"weight": weight, "config": config}
|
|
|
|
# Create experiment config
|
|
exp_config = {"name": name, "size": int(size), "seed": int(seed) if seed else None, "datasets": datasets}
|
|
|
|
# Show final config
|
|
console.print("\nFinal configuration:")
|
|
console.print(Syntax(yaml.dump(exp_config), "yaml"))
|
|
|
|
if Confirm.ask("Create experiment with this configuration?"):
|
|
try:
|
|
config = ExperimentCreate(**exp_config)
|
|
response = client.create_experiment(name, config)
|
|
console.print(f"[green]Created experiment[/] [cyan]{response.name}[/]")
|
|
except Exception as e:
|
|
console.print(f"[red]Error creating experiment: {e}[/]")
|
|
raise typer.Exit(1)
|
|
else:
|
|
console.print("[yellow]Experiment creation cancelled[/]")
|
|
raise typer.Exit()
|
|
|
|
|
|
@experiments_app.command("delete")
|
|
def delete_experiment(
|
|
name: str = typer.Argument(..., help="Name of the experiment to delete"),
|
|
force: bool = typer.Option(False, "--force", "-f", help="Force deletion without confirmation"),
|
|
):
|
|
"""Delete an experiment."""
|
|
if not force and not Confirm.ask(f"Delete experiment [cyan]{name}[/]?"):
|
|
raise typer.Exit()
|
|
|
|
try:
|
|
client.delete_experiment(name)
|
|
console.print(f"[green]Deleted experiment[/] [cyan]{name}[/]")
|
|
except Exception as e:
|
|
console.print(f"[red]Error deleting experiment: {e}[/]")
|
|
raise typer.Exit(1)
|
|
|
|
|
|
@experiments_app.command("show")
|
|
def show_experiment(
|
|
name: str = typer.Argument(..., help="Name of the experiment"),
|
|
):
|
|
"""Show experiment details."""
|
|
try:
|
|
config = client.get_experiment_config(name)
|
|
console.print(Syntax(yaml.dump(config.model_dump()), "yaml"))
|
|
except Exception as e:
|
|
console.print(f"[red]Error getting experiment config: {e}[/]")
|
|
raise typer.Exit(1)
|
|
|
|
|
|
@config_app.command("edit")
|
|
def edit_config(
|
|
experiment: str = typer.Argument(..., help="Name of the experiment"),
|
|
dataset: str = typer.Argument(..., help="Name of the dataset to edit"),
|
|
):
|
|
"""Interactive configuration editor."""
|
|
try:
|
|
exp_config = client.get_experiment_config(experiment)
|
|
if dataset not in exp_config.datasets:
|
|
console.print(f"[red]Dataset {dataset} not found in experiment[/]")
|
|
raise typer.Exit(1)
|
|
current_config = exp_config.datasets[dataset]["config"]
|
|
|
|
console.print(f"\nCurrent configuration for [cyan]{dataset}[/]:")
|
|
console.print(Syntax(yaml.dump(current_config), "yaml"))
|
|
|
|
# Interactive editing
|
|
new_config = {}
|
|
for key, value in current_config.items():
|
|
new_value = Prompt.ask(f"{key}", default=str(value), show_default=True)
|
|
|
|
# Try to convert to appropriate type
|
|
try:
|
|
if isinstance(value, bool):
|
|
new_value = new_value.lower() == "true"
|
|
elif isinstance(value, int):
|
|
new_value = int(new_value)
|
|
elif isinstance(value, float):
|
|
new_value = float(new_value)
|
|
except ValueError:
|
|
console.print(f"[yellow]Warning: Could not convert {new_value} to {type(value)}[/]")
|
|
|
|
new_config[key] = new_value
|
|
|
|
# Show changes
|
|
console.print("\nNew configuration:")
|
|
console.print(Syntax(yaml.dump(new_config), "yaml"))
|
|
|
|
if Confirm.ask("Apply these changes?"):
|
|
try:
|
|
config_update = DatasetConfigUpdate(config=new_config)
|
|
client.update_dataset_config(experiment, dataset, config_update)
|
|
console.print("[green]Configuration updated successfully[/]")
|
|
except Exception as e:
|
|
console.print(f"[red]Error updating configuration: {e}[/]")
|
|
raise typer.Exit(1)
|
|
else:
|
|
console.print("[yellow]Update cancelled[/]")
|
|
|
|
except Exception as e:
|
|
console.print(f"[red]Error getting experiment configuration: {e}[/]")
|
|
raise typer.Exit(1)
|
|
|
|
|
|
def main():
|
|
"""Entry point for the CLI."""
|
|
app()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|