diff --git a/reasoning_gym/code/bf.py b/reasoning_gym/code/bf.py
index 5ada391e..973a41a6 100644
--- a/reasoning_gym/code/bf.py
+++ b/reasoning_gym/code/bf.py
@@ -117,36 +117,6 @@ int main() {{
         # bf = Minify.minify(bf) # Is this necessary?
         return bf
 
-    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
-        """Determine if the solution provided solves the BF task.
-
-        The function awards 1.0 for a correct answer.
-
-        Args:
-            answer (Optional[str]): The user's answer.
-            entry (dict[str, Any]): The original dataset entry containing the correct answer.
-
-        Returns:
-            float: The computed score between 0.0 and 1.0.
-        """
-
-        if not isinstance(answer, str):
-            return 0.0
-
-        if answer == entry["answer"]:
-            return 1.0  # Yay
-
-        if entry["answer"] in answer.splitlines():
-            # We can be quite confident that the correct answer was given
-            # It was likely just given alongside an explanation
-            return max(0.9 * len(answer) / len(entry["answer"]), 0.1)
-
-        if entry["answer"] in answer:
-            # Since answers are English words, some risk of the response coincidentally containing the answer
-            return max(0.5 * len(answer) / len(entry["answer"]), 0.1)
-
-        return 0.0
-
 
 class BFCurriculum(BaseCurriculum):
     def __init__(self):