diff --git a/environments/answer_format_environment/answer_format_environment.py b/environments/answer_format_environment/answer_format_environment.py index 949f8f9e..c84d53c7 100644 --- a/environments/answer_format_environment/answer_format_environment.py +++ b/environments/answer_format_environment/answer_format_environment.py @@ -3653,14 +3653,6 @@ class AnswerFormatEnv(BaseEnv): ) await self._save_failed_rollouts_to_jsonl() - # Check if all scores are the same (no learning signal) - if all(group_scores[0] == score for score in group_scores): - if self.debug_logging: - self.logger.debug( - "All scores are identical, returning None for learning signal" - ) - return None - # Track successful groups for equivalent ratio enforcement if self.ensure_equivalent_ratios: # Count this as a successful group if we have any successful examples diff --git a/environments/community/cat_behavior_env/catbot_arena.py b/environments/community/cat_behavior_env/catbot_arena.py index 1b2d69fd..8de5a843 100644 --- a/environments/community/cat_behavior_env/catbot_arena.py +++ b/environments/community/cat_behavior_env/catbot_arena.py @@ -285,8 +285,6 @@ class GSM8kEnv(BaseEnv): percentage_of_range = min(percentage_of_range, 1.0) # Apply linear penalty scaling from 1.0 down to 0.0 scores["scores"].append(1.0 - percentage_of_range) - if all([scores["scores"][0] == score for score in scores["scores"]]): - return None # If all the same, we return None return scores else: # If the gold solution is not parseable, we return None diff --git a/environments/community/dynastai/dynastai_server.py b/environments/community/dynastai/dynastai_server.py index 1ea3af05..0678e96e 100755 --- a/environments/community/dynastai/dynastai_server.py +++ b/environments/community/dynastai/dynastai_server.py @@ -503,11 +503,6 @@ class DynastAIEnv(BaseEnv): for score in scores["scores"]: self.percent_correct_buffer.append(max(score, 0)) - # Check if all the same - if all([score == scores["scores"][0] for score in scores["scores"]]): - print("[DYNASTAI] All scores identical, returning None") - return None # If all the same, we return None - return scores async def get_next_item(self) -> DynastAIRow: diff --git a/environments/community/mcp_tool_calling/tool_calling_server.py b/environments/community/mcp_tool_calling/tool_calling_server.py index 3c765c72..57d2cd56 100644 --- a/environments/community/mcp_tool_calling/tool_calling_server.py +++ b/environments/community/mcp_tool_calling/tool_calling_server.py @@ -404,10 +404,6 @@ class SingleToolCallingEnv(BaseEnv): # Apply linear penalty scaling from 1.0 down to 0.0 scores["scores"].append(1.0 - percentage_of_range) - # Check if all scores are the same (no learning signal) - if all(scores["scores"][0] == score for score in scores["scores"]): - return None - return scores async def get_next_item(self): diff --git a/environments/community/options_iv_prediction/options_iv_prediction.py b/environments/community/options_iv_prediction/options_iv_prediction.py index 16d7b9fe..d771516e 100644 --- a/environments/community/options_iv_prediction/options_iv_prediction.py +++ b/environments/community/options_iv_prediction/options_iv_prediction.py @@ -433,10 +433,6 @@ class OptionsIVPrediction(BaseEnv): if len(scores["tokens"]) >= self.config.group_size: break - # Return None if all scores are the same (no learning signal) - if all(scores["scores"][0] == score for score in scores["scores"]): - return None - return scores async def rollout_and_score_eval(self, test_item): diff --git a/environments/community/pay_to_play/pay_to_play_env.py b/environments/community/pay_to_play/pay_to_play_env.py index 1eacdb8b..d9760d41 100644 --- a/environments/community/pay_to_play/pay_to_play_env.py +++ b/environments/community/pay_to_play/pay_to_play_env.py @@ -777,10 +777,6 @@ End your evaluation with \\boxed{{score}} where score is your numerical rating. if hasattr(self, "last_agent_card_feedback"): self.last_agent_card_feedback = agent_card_feedback - # Ensure we have different scores for training signal - if len(set(scores["scores"])) == 1: - return None - return scores def _extract_score_from_agent_card(self, agent_card_response: str) -> float: diff --git a/environments/community/physical_space_stl/physical_env.py b/environments/community/physical_space_stl/physical_env.py index 7cedb2ea..91c56672 100644 --- a/environments/community/physical_space_stl/physical_env.py +++ b/environments/community/physical_space_stl/physical_env.py @@ -409,9 +409,6 @@ class PhysicalEnv(BaseEnv): percentage_of_range = min(percentage_of_range, 1.0) scores["scores"].append(1.0 - percentage_of_range) - if all([scores["scores"][0] == score for score in scores["scores"]]): - return None # If all the same, we return None - return scores async def get_next_item(self) -> PhysicalRow: diff --git a/environments/community/regex_generation/regex_env.py b/environments/community/regex_generation/regex_env.py index baa16e59..30fe311b 100644 --- a/environments/community/regex_generation/regex_env.py +++ b/environments/community/regex_generation/regex_env.py @@ -248,10 +248,6 @@ class RegexEnv(BaseEnv): 1.0 if s >= self.config.score_threshold else 0.0 ) - # If all scores identical, no learning signal - if len(set(scores["scores"])) == 1: - return None - return scores async def rollout_and_score_eval(self, problem: dict) -> dict: diff --git a/environments/community/solitaire_winning_probability/solitaire_server.py b/environments/community/solitaire_winning_probability/solitaire_server.py index e38f02f8..c106d067 100644 --- a/environments/community/solitaire_winning_probability/solitaire_server.py +++ b/environments/community/solitaire_winning_probability/solitaire_server.py @@ -377,8 +377,6 @@ class SolitaireEnv(BaseEnv): percentage_of_range = min(percentage_of_range, 1.0) # Apply linear penalty scaling from 1.0 down to 0.0 scores["scores"].append(1.0 - percentage_of_range) - if all([scores["scores"][0] == score for score in scores["scores"]]): - return None # If all the same, we return None return scores else: # If the gold solution is not parseable, we return None diff --git a/environments/community/sql_query_env/sql_query_env.py b/environments/community/sql_query_env/sql_query_env.py index 1e2a75f2..8f86e4c8 100644 --- a/environments/community/sql_query_env/sql_query_env.py +++ b/environments/community/sql_query_env/sql_query_env.py @@ -408,9 +408,6 @@ class SQLQueryEnv(BaseEnv): percentage_of_range = min(percentage_of_range, 1.0) scores["scores"].append(1.0 - percentage_of_range) - if all([scores["scores"][0] == score for score in scores["scores"]]): - return None # If all the same, return None - return scores async def get_next_item(self) -> WikiSQLRow: diff --git a/environments/community/wikipedia_research/tool_calling_server.py b/environments/community/wikipedia_research/tool_calling_server.py index e14bc717..e51cba19 100644 --- a/environments/community/wikipedia_research/tool_calling_server.py +++ b/environments/community/wikipedia_research/tool_calling_server.py @@ -404,10 +404,6 @@ class SingleToolCallingEnv(BaseEnv): # Apply linear penalty scaling from 1.0 down to 0.0 scores["scores"].append(1.0 - percentage_of_range) - # Check if all scores are the same (no learning signal) - if all(scores["scores"][0] == score for score in scores["scores"]): - return None - return scores async def get_next_item(self): diff --git a/environments/eval_environments/pairwise_judgement_environment.py b/environments/eval_environments/pairwise_judgement_environment.py index 06afae54..6fc894b3 100644 --- a/environments/eval_environments/pairwise_judgement_environment.py +++ b/environments/eval_environments/pairwise_judgement_environment.py @@ -1112,10 +1112,6 @@ class PairwiseJudgementEnv(BaseEnv): for score in scores["scores"]: self.percent_correct_buffer.append(max(score, 0)) - # Return None if all scores are the same (no learning signal) - if len(set(scores["scores"])) == 1: - return None - return scores except Exception as e: diff --git a/environments/fundamental_prediction_environment.py b/environments/fundamental_prediction_environment.py index 9c359306..eeaaed98 100644 --- a/environments/fundamental_prediction_environment.py +++ b/environments/fundamental_prediction_environment.py @@ -402,10 +402,6 @@ class FundamentalPredictionEnv(BaseEnv): if len(scores["tokens"]) >= self.config.group_size: break - # Return None if all scores are the same (no learning signal) - if all(scores["scores"][0] == score for score in scores["scores"]): - return None - return scores async def rollout_and_score_eval(self, test_item): diff --git a/environments/gsm8k_server.py b/environments/gsm8k_server.py index 6ae5285b..8b3589da 100644 --- a/environments/gsm8k_server.py +++ b/environments/gsm8k_server.py @@ -352,8 +352,6 @@ class GSM8kEnv(BaseEnv): percentage_of_range = min(percentage_of_range, 1.0) # Apply linear penalty scaling from 1.0 down to 0.0 scores["scores"].append(1.0 - percentage_of_range) - if all([scores["scores"][0] == score for score in scores["scores"]]): - return None # If all the same, we return None return scores else: # If the gold solution is not parseable, we return None diff --git a/environments/gsm8k_server_axolotl.py b/environments/gsm8k_server_axolotl.py index 244861f8..571eb609 100644 --- a/environments/gsm8k_server_axolotl.py +++ b/environments/gsm8k_server_axolotl.py @@ -283,8 +283,6 @@ class GSM8kEnv(BaseEnv): percentage_of_range = min(percentage_of_range, 1.0) # Apply linear penalty scaling from 1.0 down to 0.0 scores["scores"].append(1.0 - percentage_of_range) - if all([scores["scores"][0] == score for score in scores["scores"]]): - return None # If all the same, we return None return scores else: # If the gold solution is not parseable, we return None diff --git a/environments/letter_counting_environment/letter_counting_environment.py b/environments/letter_counting_environment/letter_counting_environment.py index 2398373b..6f4f9cf1 100644 --- a/environments/letter_counting_environment/letter_counting_environment.py +++ b/environments/letter_counting_environment/letter_counting_environment.py @@ -1248,13 +1248,6 @@ class LetterCountingEnv(BaseEnv): ) return None - # Skip if all scores are identical (no learning signal) - if all(scores["scores"][0] == score for score in scores["scores"]): - self.logger.debug( - f"All scores identical ({scores['scores'][0]:.2f}) - skipping group" - ) - return None - return scores async def rollout_and_score_eval(self, eval_item: Dict) -> Tuple[int, int]: diff --git a/environments/math_server.py b/environments/math_server.py index 5b343b9f..b0dafbfd 100644 --- a/environments/math_server.py +++ b/environments/math_server.py @@ -534,8 +534,6 @@ class MathEnv(BaseEnv): return None, to_backlog else: return None, to_backlog - else: - return None, to_backlog else: self.normal_rollouts.append( ( @@ -1167,8 +1165,6 @@ class MathEnv(BaseEnv): "Max message delta is less than 0.1 * shortest message, no length penalty" ) return None, [] - elif all([score == scores["scores"][0] for score in scores["scores"]]): - return None, [] if len(for_table) > 0: self.judge_rollouts.append(for_table) if len(self.judge_rollouts) >= self.config.num_rollouts_to_keep: diff --git a/environments/math_server_zero.py b/environments/math_server_zero.py index 1432ab4d..43cd6a09 100644 --- a/environments/math_server_zero.py +++ b/environments/math_server_zero.py @@ -383,10 +383,6 @@ class MathEnv(BaseEnv): to_postprocess = await self.score(to_score) if to_postprocess is None: return None, to_backlog - if all( - [to_postprocess["scores"][0] == score for score in to_postprocess["scores"]] - ): - return None, to_backlog self.normal_rollouts.append( ( prompt_format.format(prompt=problem_format.format(problem=item[0])), diff --git a/environments/mcp_env.py b/environments/mcp_env.py index a8d621bc..f8892cb1 100644 --- a/environments/mcp_env.py +++ b/environments/mcp_env.py @@ -347,9 +347,6 @@ class McpEnv(BaseEnv): for score in scores["scores"]: self.percent_correct_buffer.append(max(score, 0)) - if all(scores["scores"][0] == score for score in scores["scores"]): - return None - return scores async def get_next_item(self): diff --git a/environments/mcqa_thinking_env.py b/environments/mcqa_thinking_env.py index 417822f6..9738a85e 100644 --- a/environments/mcqa_thinking_env.py +++ b/environments/mcqa_thinking_env.py @@ -373,10 +373,6 @@ class MCQAThinkingEnv(BaseEnv): for score in scores["scores"]: self.percent_correct_buffer.append(max(score, 0)) - # Return None if all scores are the same (no learning signal) - if all(scores["scores"][0] == score for score in scores["scores"]): - return None - return scores async def rollout_and_score_eval(self, test_item): diff --git a/environments/text_reversal_environment.py b/environments/text_reversal_environment.py index 0b41e2ec..738cb663 100644 --- a/environments/text_reversal_environment.py +++ b/environments/text_reversal_environment.py @@ -770,10 +770,6 @@ class TextReversalEnv(BaseEnv): for score in scores["scores"]: self.percent_correct_buffer.append(max(score, 0)) - # Return None if all scores are the same (no learning signal) - if len(set(scores["scores"])) == 1: - return None - return scores except Exception as e: diff --git a/environments/tool_calling_server.py b/environments/tool_calling_server.py index 10409417..e6824118 100644 --- a/environments/tool_calling_server.py +++ b/environments/tool_calling_server.py @@ -413,10 +413,6 @@ class SingleToolCallingEnv(BaseEnv): # Apply linear penalty scaling from 1.0 down to 0.0 scores["scores"].append(1.0 - percentage_of_range) - # Check if all scores are the same (no learning signal) - if all(scores["scores"][0] == score for score in scores["scores"]): - return None - return scores async def get_next_item(self):