added r1 evaluation logic

2026-04-19 12:58:07 +00:00 · 2025-02-11 03:46:56 +00:00 · 2025-02-11 03:46:56 +00:00 · 42e02640a3
commit 42e02640a3
parent 0657222a8f
6 changed files with 208 additions and 0 deletions
--- a/eval/r1/eval_config.py
+++ b/eval/r1/eval_config.py
@ -0,0 +1,25 @@
+from dataclasses import dataclass
+from typing import List, Union
+
+import yaml
+
+from reasoning_gym.utils import SYSTEM_PROMPTS
+
+
+@dataclass
+class EvalConfig:
+    category: str
+    datasets: Union[str, List[str]]
+    eval_dir: str
+    dataset_size: int
+    dataset_seed: int
+    model: str = "deepseek/deepseek-r1"
+    provider: str = "Nebius"
+    developer_role: str = "system"
+    developer_prompt: str = SYSTEM_PROMPTS["DeepSeekZero"]
+
+    @classmethod
+    def from_yaml(cls, yaml_path: str):
+        with open(yaml_path, "r") as f:
+            config = yaml.safe_load(f)
+        return cls(**config)