Revert "rm hardcoded same score check"

This reverts commit f02c24204d.
This commit is contained in:
Partho Das 2026-03-10 01:42:44 +05:30
parent cdc23ba5dc
commit 632ab0161c
22 changed files with 85 additions and 0 deletions

View file

@ -3653,6 +3653,14 @@ class AnswerFormatEnv(BaseEnv):
)
await self._save_failed_rollouts_to_jsonl()
# Check if all scores are the same (no learning signal)
if all(group_scores[0] == score for score in group_scores):
if self.debug_logging:
self.logger.debug(
"All scores are identical, returning None for learning signal"
)
return None
# Track successful groups for equivalent ratio enforcement
if self.ensure_equivalent_ratios:
# Count this as a successful group if we have any successful examples