diff --git a/environments/optimizer/deploy.py b/environments/optimizer/deploy.py index 83debc53..99325915 100644 --- a/environments/optimizer/deploy.py +++ b/environments/optimizer/deploy.py @@ -14,7 +14,7 @@ from modal.sandbox import Sandbox # --- Modal App and Images --- APP_NAME = "optimizer-test" -SANDBOX_APP_NAME = "new_sandbox" +SANDBOX_APP_NAME = "new_sandbox_test" app = App(APP_NAME) @@ -28,9 +28,6 @@ benchmark_volume = Volume.from_name("benchmark-responses") optimizers_volume = Volume.from_name("optimizers") sys_prompt_volume = Volume.from_name("optimizerSystemPrompt") -# --- Sandbox Setup --- -sandbox_app = App.lookup(SANDBOX_APP_NAME, create_if_missing=True) -sandbox = Sandbox.create(app=sandbox_app, image=sandbox_image, timeout=60 * 60) # --- Utility Functions --- @@ -60,6 +57,10 @@ def send_code(code: str): """Send and execute optimizer code in the sandbox environment.""" filename, optimizer_code = _write_optimizer_code_to_volume(code, optimizers_volume) + # --- Sandbox Setup --- + sandbox_app = App.lookup(SANDBOX_APP_NAME, create_if_missing=True) + sandbox = Sandbox.create(app=sandbox_app, image=sandbox_image, timeout=60 * 60) + # Write code to sandbox with sandbox.open(filename, "w") as f: f.write(optimizer_code) diff --git a/environments/optimizer/evaluator.py b/environments/optimizer/evaluator.py index 6a51b517..fb70c6ba 100644 --- a/environments/optimizer/evaluator.py +++ b/environments/optimizer/evaluator.py @@ -36,7 +36,7 @@ class OptimizerEvaluator: CategoricalJudgeUnit( name='Judge', categories=DiscreteScale(['yes', 'no']), - explanation=False + explanation=False, ).prompt(""" You are an expert code validator specializing in PyTorch optimizers. Your task is to determine if the provided optimizer code is completely valid and error-free. @@ -114,12 +114,6 @@ print(f"\nOptimal x: {x.item():.4f}") stderr = """ Traceback (most recent call last): - File "optimizer.py", line 8, in - optimizer = torch.optim.SGD([x], lr=0.1) - RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 4.00 GiB total capacity; 3.47 GiB already allocated; 0 bytes free; 3.47 GiB reserved in total by PyTorch) - - Traceback (most recent call last): - File "optimizer.py", line 12, in < """ score = evaluator.check_validity(optimizer_code=optimizer_code, stdout=stdout, stderr=stderr)