include ranges rather than sampled values in difficulty metadata dicts (#387)

* update difficulty metadata for logic datasets

* update difficulty metadata for graph datasets

* update difficulty metadata for geometry datasets

* update difficulty metadata for games datasets

* update difficulty metadata for cognition datasets

* update difficulty metadata for arithmetic datasets

* update difficulty metadata for arc datasets

* update difficulty metadata for algorithmic datasets

* update difficulty metadata for algebra datasets

* use tuples

* update tests

* update tests
This commit is contained in:
Oliver Stanley 2025-03-20 09:27:03 +00:00 committed by GitHub
parent b69c35818a
commit 7475a20700
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
80 changed files with 304 additions and 126 deletions

View file

@ -53,11 +53,15 @@ def test_coach_with_chain_sum():
# Each key should be a tuple of tuples containing difficulty parameters
for key in aggregated.scores:
assert isinstance(key, tuple)
# Each inner tuple should be (param_name, value)
# Each inner tuple should be (param_name, value) or (param_name, (min_value, max_value))
for param in key:
assert isinstance(param, tuple)
assert param[0] in ("num_terms", "num_digits")
assert isinstance(param[1], int)
assert (
isinstance(param[1], int)
or (isinstance(param[1], tuple) and len(param[1]) == 2)
and all(isinstance(v, int) for v in param[1])
)
# Test aggregation with last_n
last_3 = coach.score_board.aggregate(last_n=3)
@ -171,7 +175,7 @@ def test_coach_with_composite():
item = coach[i + 5] # Use different indices
if "chain_sum" in item["metadata"]["source_dataset"]:
metadata = item["metadata"]
assert metadata["difficulty"]["num_terms"] >= 4
assert metadata["num_terms"] >= 4
def test_grouped_scores_str():