include ranges rather than sampled values in difficulty metadata dicts (#387)

* update difficulty metadata for logic datasets * update difficulty metadata for graph datasets * update difficulty metadata for geometry datasets * update difficulty metadata for games datasets * update difficulty metadata for cognition datasets * update difficulty metadata for arithmetic datasets * update difficulty metadata for arc datasets * update difficulty metadata for algorithmic datasets * update difficulty metadata for algebra datasets * use tuples * update tests * update tests
2026-04-19 12:58:07 +00:00 · 2025-03-20 09:27:03 +00:00 · 2025-03-20 09:27:03 +00:00 · 7475a20700
commit 7475a20700
parent b69c35818a
80 changed files with 304 additions and 126 deletions
--- a/tests/test_coaching.py
+++ b/tests/test_coaching.py
@ -53,11 +53,15 @@ def test_coach_with_chain_sum():
    # Each key should be a tuple of tuples containing difficulty parameters
    for key in aggregated.scores:
        assert isinstance(key, tuple)
-        # Each inner tuple should be (param_name, value)
+        # Each inner tuple should be (param_name, value) or (param_name, (min_value, max_value))
        for param in key:
            assert isinstance(param, tuple)
            assert param[0] in ("num_terms", "num_digits")
-            assert isinstance(param[1], int)
+            assert (
+                isinstance(param[1], int)
+                or (isinstance(param[1], tuple) and len(param[1]) == 2)
+                and all(isinstance(v, int) for v in param[1])
+            )

    # Test aggregation with last_n
    last_3 = coach.score_board.aggregate(last_n=3)
@ -171,7 +175,7 @@ def test_coach_with_composite():
        item = coach[i + 5]  # Use different indices
        if "chain_sum" in item["metadata"]["source_dataset"]:
            metadata = item["metadata"]
-            assert metadata["difficulty"]["num_terms"] >= 4
+            assert metadata["num_terms"] >= 4


 def test_grouped_scores_str():