diff --git a/environments/eval_environments/verifiers_eval.py b/environments/eval_environments/verifiers_eval.py index 61e17656..6d7c3755 100644 --- a/environments/eval_environments/verifiers_eval.py +++ b/environments/eval_environments/verifiers_eval.py @@ -11,7 +11,7 @@ To install a Verifiers/Prime environment: Docs: https://docs.primeintellect.ai/tutorials-environments/install Usage: - python verifiers_evaluation.py evaluate \ + python verifiers_eval.py evaluate \ --env.vf_env_name primeintellect/gsm8k \ --openai.model_name gpt-4.1-nano \ --openai.api_key $OPENAI_API_KEY @@ -235,12 +235,12 @@ class VerifiersEvaluationEnv(BaseEnv): async def evaluate(self, *args, **kwargs) -> Dict: """Run the full evaluation.""" - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Starting Verifiers Evaluation: {self.config.vf_env_name}") - print(f"{'='*60}") + print(f"{'=' * 60}") print(f" Total questions: {len(self.eval_items)}") print(f" Temperature: {self.config.temperature}") - print(f"{'='*60}\n") + print(f"{'=' * 60}\n") start_time = time.time() @@ -286,9 +286,9 @@ class VerifiersEvaluationEnv(BaseEnv): } # Print results - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print("Verifiers Evaluation Results") - print(f"{'='*60}") + print(f"{'=' * 60}") print(f" Average Score: {avg_score:.4f}") print(f" Accuracy: {accuracy:.2%} ({correct}/{total})") print(f" Time: {end_time - start_time:.1f}s") @@ -297,7 +297,7 @@ class VerifiersEvaluationEnv(BaseEnv): print( f" {name}: avg={data['avg']:.4f}, correct={data['correct']}/{total}" ) - print(f"{'='*60}\n") + print(f"{'=' * 60}\n") # Log to evaluate_log samples = [ diff --git a/environments/verifiers_server.py b/environments/verifiers_server.py index 873cf266..e7216919 100644 --- a/environments/verifiers_server.py +++ b/environments/verifiers_server.py @@ -1,15 +1,15 @@ -# Verifiers Training Environment for Atropos -# -# NOTE: This environment requires a LOCAL inference server (vLLM, SGLang, TRL) -# for ALL modes (serve, process, evaluate) because it uses ManagedServer for -# token/logprob tracking. For evaluation with OpenAI API, use: -# environments/eval_environments/verifiers_eval.py -# -# To install a Verifiers/Prime environment: -# 1. uv tool install prime -# 2. prime login -# 3. prime env install will/wordle (or any owner/environment) -# Docs: https://docs.primeintellect.ai/tutorials-environments/install +""" +Verifiers Training Environment for Atropos +NOTE: This environment requires a LOCAL inference server (vLLM, SGLang, TRL) +for ALL modes (serve, process, evaluate) because it uses ManagedServer for +token/logprob tracking. For evaluation with OpenAI API, use: `environments/eval_environments/verifiers_eval.py` + +To install a Verifiers/Prime environment: +1. uv tool install prime +2. prime login +3. prime env install will/wordle (or any owner/environment) +Docs: https://docs.primeintellect.ai/tutorials-environments/install +""" import time from typing import Any, Dict, List, Optional, Tuple