mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-22 16:49:06 +00:00
Merge pull request #141 from joesharratt1229/feat/score-answer-impl
Added score answer implementations `spell_backward` and `sentence reordering`
This commit is contained in:
commit
66ddb41bbd
6 changed files with 50 additions and 8 deletions
|
|
@ -3,7 +3,7 @@
|
|||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Optional
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from ..data import read_data_file
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
|
@ -92,5 +92,26 @@ class SentenceReorderingDataset(ProceduralDataset):
|
|||
"metadata": {"word_count": word_count},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
|
||||
reward = 0
|
||||
expected_answer = entry["answer"]
|
||||
if answer is not None:
|
||||
try:
|
||||
if expected_answer == answer:
|
||||
return 1.0
|
||||
goal_words = expected_answer.split()
|
||||
answer_words = answer.split()
|
||||
if len(goal_words) == len(answer_words):
|
||||
credit = [
|
||||
1 if goal_word.lower() == answer_word.lower() else 0
|
||||
for goal_word, answer_word in zip(goal_words, answer_words)
|
||||
]
|
||||
reward = sum(credit) / len(credit)
|
||||
else:
|
||||
reward = 0.05
|
||||
except:
|
||||
reward = 0.01
|
||||
return reward
|
||||
|
||||
|
||||
register_dataset("sentence_reordering", SentenceReorderingDataset, SentenceReorderingConfig)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Optional
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from ..data import read_data_file
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
|
@ -49,5 +49,18 @@ class SpellBackwardDataset(ProceduralDataset):
|
|||
"metadata": {"word": word, "word_len": len(word)},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
|
||||
reward = 0
|
||||
expected_answer = entry["answer"]
|
||||
if answer is not None:
|
||||
try:
|
||||
if expected_answer.lower() == answer.lower():
|
||||
reward = 1.0
|
||||
else:
|
||||
reward = 0.05
|
||||
except:
|
||||
reward = 0.01
|
||||
return reward
|
||||
|
||||
|
||||
register_dataset("spell_backward", SpellBackwardDataset, SpellBackwardConfig)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@ from typing import Dict, List, Optional, Set, Tuple
|
|||
from ..data import get_data_file_path
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
QUESTION_TEMPLATE = """Transform the word ladder '{start}' to '{end}' by changing one letter at a time.
|
||||
Provide your answer as a comma-separated sequence of uppercase letters without spaces.
|
||||
Each step must be a valid English word."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class WordLadderConfig:
|
||||
|
|
@ -211,7 +215,7 @@ class WordLadderDataset(ProceduralDataset):
|
|||
raise IndexError(f"Dataset exhausted at index {idx}. {str(e)}")
|
||||
|
||||
return {
|
||||
"question": f"Transform the word ladder '{start}' to '{end}' by changing one letter at a time.",
|
||||
"question": QUESTION_TEMPLATE.format(start=start, end=end),
|
||||
"answer": ",".join(path),
|
||||
"metadata": {"start_word": start, "end_word": end, "word_length": length, "chain_length": len(path)},
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue