mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-27 17:23:08 +00:00
feat: implement Elastic Environment Orchestrator (DEO) for environment microservices
This commit is contained in:
parent
c20c85256e
commit
5a1ea7d3cb
8 changed files with 478 additions and 0 deletions
79
atroposlib/cli/orchestrate.py
Normal file
79
atroposlib/cli/orchestrate.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import argparse
|
||||
import time
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
from atroposlib.orchestration.metrics import MetricsCollector
|
||||
from atroposlib.orchestration.controller import ScalingController
|
||||
from atroposlib.orchestration.strategy import LocalActor
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
||||
)
|
||||
logger = logging.getLogger("DEO")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Atropos Elastic Orchestrator (DEO)")
|
||||
parser.add_argument("--server-url", type=str, default="http://localhost:8000", help="Atropos server URL")
|
||||
parser.add_argument("--env-command", type=str, required=True, help="Command to launch environment server")
|
||||
parser.add_argument("--min-actors", type=int, default=1, help="Min environment actors")
|
||||
parser.add_argument("--max-actors", type=int, default=20, help="Max environment actors")
|
||||
parser.add_argument("--target-pressure", type=float, default=1.0, help="Target Rollout Pressure (Queue/BatchSize)")
|
||||
parser.add_argument("--poll-interval", type=int, default=10, help="Poll interval in seconds")
|
||||
parser.add_argument("--cooldown", type=int, default=60, help="Scaling cooldown in seconds")
|
||||
parser.add_argument("--max-step", type=int, default=4, help="Max actors to add/remove at once")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 1. Initialize metrics collector
|
||||
collector = MetricsCollector(args.server_url)
|
||||
|
||||
# 2. Initialize Scaling Controller
|
||||
controller = ScalingController(
|
||||
min_actors=args.min_actors,
|
||||
max_actors=args.max_actors,
|
||||
target_pressure=args.target_pressure,
|
||||
cooldown_seconds=args.cooldown,
|
||||
max_step_change=args.max_step
|
||||
)
|
||||
|
||||
# 3. Initialize Strategy (LocalActor)
|
||||
# Convert command string to list
|
||||
env_command_list = args.env_command.split()
|
||||
actor = LocalActor(env_command_list)
|
||||
|
||||
logger.info(f"Starting DEO against {args.server_url}...")
|
||||
logger.info(f"Command: {args.env_command}")
|
||||
|
||||
# Graceful shutdown handler
|
||||
def handle_shutdown(sig, frame):
|
||||
logger.info("Shutdown signal received. Cleaning up...")
|
||||
actor.cleanup()
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGINT, handle_shutdown)
|
||||
signal.signal(signal.SIGTERM, handle_shutdown)
|
||||
|
||||
# 4. Main Control Loop
|
||||
try:
|
||||
while True:
|
||||
metrics = collector.poll()
|
||||
if metrics:
|
||||
current_actors = actor.get_current_count()
|
||||
target_actors = controller.calculate_desired(metrics, current_actors)
|
||||
|
||||
if target_actors != current_actors:
|
||||
actor.set_instance_count(target_actors)
|
||||
else:
|
||||
logger.warning("Could not fetch metrics. Check if Atropos server is running.")
|
||||
|
||||
time.sleep(args.poll_interval)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"DEO loop crashed: {e}")
|
||||
actor.cleanup()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue