mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
add first example with OpenRLHF
This commit is contained in:
parent
458057ba18
commit
cc0312e446
7 changed files with 815 additions and 2 deletions
|
|
@ -49,3 +49,17 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
|
|||
- metadata: dict
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
"""Overwrite this method in derived classes if a single oracle answer is not available."""
|
||||
oracle_answer = entry["answer"]
|
||||
reward = 0
|
||||
if answer is not None:
|
||||
if answer == oracle_answer:
|
||||
reward = 1.0
|
||||
elif oracle_answer in answer:
|
||||
reward = 0.5
|
||||
else:
|
||||
reward = 0.01
|
||||
|
||||
return reward
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue