fix bugs for symbolic regression bootcamp

2026-04-23 16:55:02 +00:00 · 2025-06-16 17:31:52 +08:00 · 2025-06-16 17:31:52 +08:00 · 5eb513f014
commit 5eb513f014
parent 33101ef068
11 changed files with 148 additions and 64 deletions
--- a/internbootcamp/bootcamp/base.py
+++ b/internbootcamp/bootcamp/base.py
@ -47,7 +47,7 @@ class Basebootcamp:
    

    @classmethod
-    def verify_score(cls, model_output, identity: dict, format_score=0, short_penalty=False, short_threshold=256, ans_threshold=128, format_penalty=False) -> float:
+    def verify_score(cls, model_output, identity: dict, format_score=0, short_penalty=False, short_threshold=256, think_threshold=128, ans_threshold=128, format_penalty=False) -> float:
        """
        Verify the output against the ground truth.
        
@ -83,10 +83,11 @@ class Basebootcamp:
            pass
        
        ans_output = model_output.rsplit("</think>", 1)[1] if "</think>" in model_output else ""
-        
-        if (short_penalty and len(model_output) < short_threshold) or (short_penalty and len(ans_output) < ans_threshold):
+        think_length = len(model_output) - len(ans_output)
+        score = max(0, score)  # Ensure score is non-negative
+        if (short_penalty and len(model_output) < short_threshold) or (short_penalty and len(ans_output) < ans_threshold) or (short_penalty and think_length < think_threshold):
            # if the output is too short, consider it incorrect
-            return min(score * len(model_output) / short_threshold, score * len(ans_output) / ans_threshold)
+            return min(score * len(model_output) / short_threshold, score * len(ans_output) / ans_threshold, score * think_length / think_threshold)
        
        # This for training Debug
        if random.randint(1,1024) == 1: