diff --git a/reasoning_gym/games/countdown.py b/reasoning_gym/games/countdown.py index edec6c38..cbaaa39d 100644 --- a/reasoning_gym/games/countdown.py +++ b/reasoning_gym/games/countdown.py @@ -121,14 +121,24 @@ class CountdownDataset(ProceduralDataset): # Substitute actual numbers to get target subs = {sym: num for sym, num in zip(syms, numbers)} - target = int(expr.subs(subs)) - - # Convert to string expression - expr_str = str(expr) - for i, sym in enumerate(syms): - expr_str = expr_str.replace(str(sym), str(numbers[i])) - - return expr_str, numbers, target + try: + target = int(expr.subs(subs)) + + # Convert to string expression + expr_str = str(expr) + for i, sym in enumerate(syms): + expr_str = expr_str.replace(str(sym), str(numbers[i])) + + # Ensure target is within bounds + if self.config.min_target <= target <= self.config.max_target: + return expr_str, numbers, target + + # If target out of bounds, try again with new expression + return self._generate_expression(rng) + + except (ValueError, ZeroDivisionError): + # If evaluation fails, try again with new expression + return self._generate_expression(rng) # Register the dataset diff --git a/tests/test_countdown.py b/tests/test_countdown.py index d015d143..365273a6 100644 --- a/tests/test_countdown.py +++ b/tests/test_countdown.py @@ -61,6 +61,14 @@ def test_countdown_game_items(): # Verify all numbers are within config range assert all(config.min_value <= n <= config.max_value for n in item["metadata"]["numbers"]) + + # Verify expression evaluates correctly + expr = item["metadata"]["expression"] + try: + result = eval(expr) # Safe here since we control expression generation + assert result == item["metadata"]["target"] + except (SyntaxError, ZeroDivisionError): + pytest.fail(f"Invalid expression generated: {expr}") def test_countdown_game_randomization():