add first example with OpenRLHF

This commit is contained in:
Andreas Koepf 2025-01-28 14:40:02 +00:00
parent 458057ba18
commit cc0312e446
7 changed files with 815 additions and 2 deletions

View file

@ -49,3 +49,17 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
- metadata: dict
"""
raise NotImplementedError
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Overwrite this method in derived classes if a single oracle answer is not available."""
oracle_answer = entry["answer"]
reward = 0
if answer is not None:
if answer == oracle_answer:
reward = 1.0
elif oracle_answer in answer:
reward = 0.5
else:
reward = 0.01
return reward