rm hardcoded same score check

This commit is contained in:
Partho Das 2026-02-23 15:03:17 +05:30
parent 708b42a00f
commit f02c24204d
22 changed files with 0 additions and 85 deletions

View file

@ -3653,14 +3653,6 @@ class AnswerFormatEnv(BaseEnv):
)
await self._save_failed_rollouts_to_jsonl()
# Check if all scores are the same (no learning signal)
if all(group_scores[0] == score for score in group_scores):
if self.debug_logging:
self.logger.debug(
"All scores are identical, returning None for learning signal"
)
return None
# Track successful groups for equivalent ratio enforcement
if self.ensure_equivalent_ratios:
# Count this as a successful group if we have any successful examples

View file

@ -285,8 +285,6 @@ class GSM8kEnv(BaseEnv):
percentage_of_range = min(percentage_of_range, 1.0)
# Apply linear penalty scaling from 1.0 down to 0.0
scores["scores"].append(1.0 - percentage_of_range)
if all([scores["scores"][0] == score for score in scores["scores"]]):
return None # If all the same, we return None
return scores
else:
# If the gold solution is not parseable, we return None

View file

@ -503,11 +503,6 @@ class DynastAIEnv(BaseEnv):
for score in scores["scores"]:
self.percent_correct_buffer.append(max(score, 0))
# Check if all the same
if all([score == scores["scores"][0] for score in scores["scores"]]):
print("[DYNASTAI] All scores identical, returning None")
return None # If all the same, we return None
return scores
async def get_next_item(self) -> DynastAIRow:

View file

@ -404,10 +404,6 @@ class SingleToolCallingEnv(BaseEnv):
# Apply linear penalty scaling from 1.0 down to 0.0
scores["scores"].append(1.0 - percentage_of_range)
# Check if all scores are the same (no learning signal)
if all(scores["scores"][0] == score for score in scores["scores"]):
return None
return scores
async def get_next_item(self):

View file

@ -433,10 +433,6 @@ class OptionsIVPrediction(BaseEnv):
if len(scores["tokens"]) >= self.config.group_size:
break
# Return None if all scores are the same (no learning signal)
if all(scores["scores"][0] == score for score in scores["scores"]):
return None
return scores
async def rollout_and_score_eval(self, test_item):

View file

@ -777,10 +777,6 @@ End your evaluation with \\boxed{{score}} where score is your numerical rating.
if hasattr(self, "last_agent_card_feedback"):
self.last_agent_card_feedback = agent_card_feedback
# Ensure we have different scores for training signal
if len(set(scores["scores"])) == 1:
return None
return scores
def _extract_score_from_agent_card(self, agent_card_response: str) -> float:

View file

@ -409,9 +409,6 @@ class PhysicalEnv(BaseEnv):
percentage_of_range = min(percentage_of_range, 1.0)
scores["scores"].append(1.0 - percentage_of_range)
if all([scores["scores"][0] == score for score in scores["scores"]]):
return None # If all the same, we return None
return scores
async def get_next_item(self) -> PhysicalRow:

View file

@ -248,10 +248,6 @@ class RegexEnv(BaseEnv):
1.0 if s >= self.config.score_threshold else 0.0
)
# If all scores identical, no learning signal
if len(set(scores["scores"])) == 1:
return None
return scores
async def rollout_and_score_eval(self, problem: dict) -> dict:

View file

@ -377,8 +377,6 @@ class SolitaireEnv(BaseEnv):
percentage_of_range = min(percentage_of_range, 1.0)
# Apply linear penalty scaling from 1.0 down to 0.0
scores["scores"].append(1.0 - percentage_of_range)
if all([scores["scores"][0] == score for score in scores["scores"]]):
return None # If all the same, we return None
return scores
else:
# If the gold solution is not parseable, we return None

View file

@ -408,9 +408,6 @@ class SQLQueryEnv(BaseEnv):
percentage_of_range = min(percentage_of_range, 1.0)
scores["scores"].append(1.0 - percentage_of_range)
if all([scores["scores"][0] == score for score in scores["scores"]]):
return None # If all the same, return None
return scores
async def get_next_item(self) -> WikiSQLRow:

View file

@ -404,10 +404,6 @@ class SingleToolCallingEnv(BaseEnv):
# Apply linear penalty scaling from 1.0 down to 0.0
scores["scores"].append(1.0 - percentage_of_range)
# Check if all scores are the same (no learning signal)
if all(scores["scores"][0] == score for score in scores["scores"]):
return None
return scores
async def get_next_item(self):

View file

@ -1112,10 +1112,6 @@ class PairwiseJudgementEnv(BaseEnv):
for score in scores["scores"]:
self.percent_correct_buffer.append(max(score, 0))
# Return None if all scores are the same (no learning signal)
if len(set(scores["scores"])) == 1:
return None
return scores
except Exception as e:

View file

@ -402,10 +402,6 @@ class FundamentalPredictionEnv(BaseEnv):
if len(scores["tokens"]) >= self.config.group_size:
break
# Return None if all scores are the same (no learning signal)
if all(scores["scores"][0] == score for score in scores["scores"]):
return None
return scores
async def rollout_and_score_eval(self, test_item):

View file

@ -352,8 +352,6 @@ class GSM8kEnv(BaseEnv):
percentage_of_range = min(percentage_of_range, 1.0)
# Apply linear penalty scaling from 1.0 down to 0.0
scores["scores"].append(1.0 - percentage_of_range)
if all([scores["scores"][0] == score for score in scores["scores"]]):
return None # If all the same, we return None
return scores
else:
# If the gold solution is not parseable, we return None

View file

@ -283,8 +283,6 @@ class GSM8kEnv(BaseEnv):
percentage_of_range = min(percentage_of_range, 1.0)
# Apply linear penalty scaling from 1.0 down to 0.0
scores["scores"].append(1.0 - percentage_of_range)
if all([scores["scores"][0] == score for score in scores["scores"]]):
return None # If all the same, we return None
return scores
else:
# If the gold solution is not parseable, we return None

View file

@ -1248,13 +1248,6 @@ class LetterCountingEnv(BaseEnv):
)
return None
# Skip if all scores are identical (no learning signal)
if all(scores["scores"][0] == score for score in scores["scores"]):
self.logger.debug(
f"All scores identical ({scores['scores'][0]:.2f}) - skipping group"
)
return None
return scores
async def rollout_and_score_eval(self, eval_item: Dict) -> Tuple[int, int]:

View file

@ -534,8 +534,6 @@ class MathEnv(BaseEnv):
return None, to_backlog
else:
return None, to_backlog
else:
return None, to_backlog
else:
self.normal_rollouts.append(
(
@ -1167,8 +1165,6 @@ class MathEnv(BaseEnv):
"Max message delta is less than 0.1 * shortest message, no length penalty"
)
return None, []
elif all([score == scores["scores"][0] for score in scores["scores"]]):
return None, []
if len(for_table) > 0:
self.judge_rollouts.append(for_table)
if len(self.judge_rollouts) >= self.config.num_rollouts_to_keep:

View file

@ -383,10 +383,6 @@ class MathEnv(BaseEnv):
to_postprocess = await self.score(to_score)
if to_postprocess is None:
return None, to_backlog
if all(
[to_postprocess["scores"][0] == score for score in to_postprocess["scores"]]
):
return None, to_backlog
self.normal_rollouts.append(
(
prompt_format.format(prompt=problem_format.format(problem=item[0])),

View file

@ -347,9 +347,6 @@ class McpEnv(BaseEnv):
for score in scores["scores"]:
self.percent_correct_buffer.append(max(score, 0))
if all(scores["scores"][0] == score for score in scores["scores"]):
return None
return scores
async def get_next_item(self):

View file

@ -373,10 +373,6 @@ class MCQAThinkingEnv(BaseEnv):
for score in scores["scores"]:
self.percent_correct_buffer.append(max(score, 0))
# Return None if all scores are the same (no learning signal)
if all(scores["scores"][0] == score for score in scores["scores"]):
return None
return scores
async def rollout_and_score_eval(self, test_item):

View file

@ -770,10 +770,6 @@ class TextReversalEnv(BaseEnv):
for score in scores["scores"]:
self.percent_correct_buffer.append(max(score, 0))
# Return None if all scores are the same (no learning signal)
if len(set(scores["scores"])) == 1:
return None
return scores
except Exception as e:

View file

@ -413,10 +413,6 @@ class SingleToolCallingEnv(BaseEnv):
# Apply linear penalty scaling from 1.0 down to 0.0
scores["scores"].append(1.0 - percentage_of_range)
# Check if all scores are the same (no learning signal)
if all(scores["scores"][0] == score for score in scores["scores"]):
return None
return scores
async def get_next_item(self):