atropos/environments/optimizer/evaluator.py

from verdict.schema import Schema
from verdict import Pipeline, Layer
from verdict.common.judge import JudgeUnit
from verdict.scale import ContinuousScale
from verdict.transform import MaxPoolUnit


class OptimizerEvaluator:
    def __init__(self):
        self.pipeline = (
            Pipeline()
            >> Layer(
                JudgeUnit(scale=ContinuousScale(1, 10)).prompt(
                    (
                        "You are a judge that is an expert at evaluating optimizers for their novelty "
                        "as they will be accepted to a prestigious research conference. Given the following "
                        "optimizer code and its architecture/use-case, you must rate it on a scale of 1 to 10 "
                        "based on how novel it is and its impactfulness in speeding up model training. "
                        "Here is the code: {source.optimizer_code}\n"
                        "Here is the architecture: {source.architecture}"
                    )
                ),
                repeat=3,
            ).via("xai/grok-3-latest")
            >> MaxPoolUnit()
        )

    def run(self, optimizer_code: str, architecture: str) -> int:
        schema = Schema.of(
            optimizer_code=optimizer_code,
            architecture=architecture,
        )
        response, _ = self.pipeline.run(schema)
        final_score = self.__get_final_score(response)
        return final_score

    def __get_final_score(self, response: dict) -> float:
        return response.get("Pipeline_root.block.block.unit[Map MaxPool]_score", 0.0)


if __name__ == "__main__":
    evaluator = OptimizerEvaluator()

    optimizer_code = """
import torch

# Define parameter (requires_grad=True)
x = torch.tensor([0.0], requires_grad=True)
optimizer = torch.optim.SGD([x], lr=0.1)

for step in range(20):
    optimizer.zero_grad()
    loss = (x - 3) ** 2
    loss.backward()
    optimizer.step()
    print(f"Step {step + 1}: x = {x.item():.4f}, loss = {loss.item():.4f}")

print(f"\nOptimal x: {x.item():.4f}")
    """

    score = evaluator.run(optimizer_code=optimizer_code, architecture="MLP")
    print(score)