added r1 evaluation logic

This commit is contained in:
joesharratt1229 2025-02-11 03:46:56 +00:00
parent 0657222a8f
commit 42e02640a3
6 changed files with 208 additions and 0 deletions

25
eval/r1/eval_config.py Normal file
View file

@ -0,0 +1,25 @@
from dataclasses import dataclass
from typing import List, Union
import yaml
from reasoning_gym.utils import SYSTEM_PROMPTS
@dataclass
class EvalConfig:
category: str
datasets: Union[str, List[str]]
eval_dir: str
dataset_size: int
dataset_seed: int
model: str = "deepseek/deepseek-r1"
provider: str = "Nebius"
developer_role: str = "system"
developer_prompt: str = SYSTEM_PROMPTS["DeepSeekZero"]
@classmethod
def from_yaml(cls, yaml_path: str):
with open(yaml_path, "r") as f:
config = yaml.safe_load(f)
return cls(**config)