ignore single whitespace at beginning and end of answer, use reward = len(oracle_answer) / len(answer)

This commit is contained in:
Andreas Koepf 2025-02-14 15:40:12 +01:00
parent 979b6ba4ef
commit 0a660a3409
3 changed files with 19 additions and 2 deletions

View file

@ -59,7 +59,7 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
if answer == oracle_answer:
reward = 1.0
elif oracle_answer in answer:
reward = 0.5
reward = len(oracle_answer) / len(answer)
else:
reward = 0.01

View file

@ -17,7 +17,7 @@ Once you have thought about the reasoning process, provide the answer in the fol
def extract_answer(completion: str, tag_name: str = "answer") -> Optional[str]:
regex = f"<{tag_name}>(.*?)</{tag_name}>"
regex = f"<{tag_name}>\\s?(.*?)\\s?</{tag_name}>"
matches = list(
re.finditer(
regex,