Formatting/scoring improvements for BF & family

This commit is contained in:
Oliver 2025-02-17 19:08:15 +00:00
parent b40c44059d
commit 0de0044d52
2 changed files with 12 additions and 4 deletions

View file

@ -28,7 +28,8 @@ class BFDataset(ProceduralDataset):
def __init__(self, config: BFConfig):
self._prompt_templates = [
"This is a BF (Brainf*ck) computer program. What is the output? \n\n{bf_program}",
"This is a BF (Brainf*ck) computer program. What is the output?\n\n{bf_program}\n\nRespond only with the exact output of the program.",
"Consider the following BF (Brainf*ck) code. What would it output?\n\n{bf_program}\n\nProvide only the exact output of the code.",
]
super().__init__(config=config, seed=config.seed, size=config.size)
@ -123,6 +124,13 @@ int main() {{
if answer == None:
return 0.0
if answer != entry["answer"]:
if entry["answer"] in answer.splitlines():
# We can be quite confident that the correct answer was given
# It was likely just given alongside an explanation
return 0.9 * len(answer) / len(entry["answer"])
if entry["answer"] in answer:
# Since answers are English words, some risk of the response coincidentally containing the answer
return 0.5 * len(answer) / len(entry["answer"])
return 0.01
else:
return 1.0 # Yay