ignore single whitespace at beginning and end of answer, use reward = len(oracle_answer) / len(answer)

This commit is contained in:
Andreas Koepf 2025-02-14 15:40:12 +01:00
parent 979b6ba4ef
commit 0a660a3409
3 changed files with 19 additions and 2 deletions

View file

@ -59,7 +59,7 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
if answer == oracle_answer:
reward = 1.0
elif oracle_answer in answer:
reward = 0.5
reward = len(oracle_answer) / len(answer)
else:
reward = 0.01