style: remove debug prints from code_debug_env scoring

2026-04-19 12:57:58 +00:00 · 2026-03-24 16:15:13 +05:30 · 2026-03-24 16:15:13 +05:30 · d84e4af213
commit d84e4af213
parent 9e727ce5ca
1 changed files with 1 additions and 18 deletions
--- a/environments/community/code_debug_env/code_debug_env.py
+++ b/environments/community/code_debug_env/code_debug_env.py
@ -398,24 +398,12 @@ class CodeDebugEnv(BaseEnv):
            reward, is_partial = self._score_fix(generated_code, item)
            self.partial_fix_buffer.append(1 if is_partial else 0)

-            # Debug logging
-            extracted_preview = (
-                generated_code[:80] + "..." if generated_code else "(none)"
-            )
-            print(
-                f"  [{item['entry_point']}] extracted={bool(generated_code)}, "
-                f"score={reward:.2f}, partial={is_partial}, "
-                f"code_preview={extracted_preview}"
-            )
-
            tokens = rollout["tokens"]
            masks = rollout["masks"]
            logprobs = rollout["logprobs"]

            # Remove obviously bad examples (too short responses)
-            non_masked = len([1 for m in masks if m != -100])
-            if non_masked < 10:
-                print(f"    Skipping: only {non_masked} non-masked tokens")
+            if len([1 for m in masks if m != -100]) < 10:
                continue

            scores["tokens"].append(tokens)
@ -427,7 +415,6 @@ class CodeDebugEnv(BaseEnv):
                break

        if not scores["scores"]:
-            print(f"  WARNING: No valid rollouts for {item['entry_point']}")
            return None

        for score in scores["scores"]:
@ -458,10 +445,6 @@ class CodeDebugEnv(BaseEnv):
        # This prevents infinite retry loops in process mode while
        # maintaining meaningful relative ordering for training
        if all(scores["scores"][0] == s for s in scores["scores"]):
-            print(
-                f"  All {len(scores['scores'])} scores identical "
-                f"({scores['scores'][0]:.2f}), adding noise"
-            )
            scores["scores"] = [
                s + random.uniform(-0.01, 0.01) for s in scores["scores"]
            ]