mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
Formatting/scoring improvements for BF & family
This commit is contained in:
parent
b40c44059d
commit
0de0044d52
2 changed files with 12 additions and 4 deletions
|
|
@ -28,7 +28,8 @@ class BFDataset(ProceduralDataset):
|
|||
|
||||
def __init__(self, config: BFConfig):
|
||||
self._prompt_templates = [
|
||||
"This is a BF (Brainf*ck) computer program. What is the output? \n\n{bf_program}",
|
||||
"This is a BF (Brainf*ck) computer program. What is the output?\n\n{bf_program}\n\nRespond only with the exact output of the program.",
|
||||
"Consider the following BF (Brainf*ck) code. What would it output?\n\n{bf_program}\n\nProvide only the exact output of the code.",
|
||||
]
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
||||
|
|
@ -123,6 +124,13 @@ int main() {{
|
|||
if answer == None:
|
||||
return 0.0
|
||||
if answer != entry["answer"]:
|
||||
if entry["answer"] in answer.splitlines():
|
||||
# We can be quite confident that the correct answer was given
|
||||
# It was likely just given alongside an explanation
|
||||
return 0.9 * len(answer) / len(entry["answer"])
|
||||
if entry["answer"] in answer:
|
||||
# Since answers are English words, some risk of the response coincidentally containing the answer
|
||||
return 0.5 * len(answer) / len(entry["answer"])
|
||||
return 0.01
|
||||
else:
|
||||
return 1.0 # Yay
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue