diff --git a/reasoning_gym/code/bf.py b/reasoning_gym/code/bf.py index c2697203..a8528843 100644 --- a/reasoning_gym/code/bf.py +++ b/reasoning_gym/code/bf.py @@ -28,7 +28,8 @@ class BFDataset(ProceduralDataset): def __init__(self, config: BFConfig): self._prompt_templates = [ - "This is a BF (Brainf*ck) computer program. What is the output? \n\n{bf_program}", + "This is a BF (Brainf*ck) computer program. What is the output?\n\n{bf_program}\n\nRespond only with the exact output of the program.", + "Consider the following BF (Brainf*ck) code. What would it output?\n\n{bf_program}\n\nProvide only the exact output of the code.", ] super().__init__(config=config, seed=config.seed, size=config.size) @@ -123,6 +124,13 @@ int main() {{ if answer == None: return 0.0 if answer != entry["answer"]: + if entry["answer"] in answer.splitlines(): + # We can be quite confident that the correct answer was given + # It was likely just given alongside an explanation + return 0.9 * len(answer) / len(entry["answer"]) + if entry["answer"] in answer: + # Since answers are English words, some risk of the response coincidentally containing the answer + return 0.5 * len(answer) / len(entry["answer"]) return 0.01 else: return 1.0 # Yay diff --git a/reasoning_gym/graphs/family_relationships.py b/reasoning_gym/graphs/family_relationships.py index ee278b33..d875d7a1 100644 --- a/reasoning_gym/graphs/family_relationships.py +++ b/reasoning_gym/graphs/family_relationships.py @@ -175,9 +175,9 @@ class FamilyRelationshipsDataset(ProceduralDataset): def __init__(self, config: FamilyRelationshipsConfig): self._templates = [ - "What is {person1} to {person2}?", - "How is {person1} related to {person2}?", - "What relation is {person1} to {person2}?", + "What is {person1} to {person2}? Respond only with the word that describes their relationship.", + "How is {person1} related to {person2}? Provide the relationship in one word.", + "What relation is {person1} to {person2}? Answer with a single word.", ] super().__init__(config=config, seed=config.seed, size=config.size)