include ranges rather than sampled values in difficulty metadata dicts (#387)

* update difficulty metadata for logic datasets

* update difficulty metadata for graph datasets

* update difficulty metadata for geometry datasets

* update difficulty metadata for games datasets

* update difficulty metadata for cognition datasets

* update difficulty metadata for arithmetic datasets

* update difficulty metadata for arc datasets

* update difficulty metadata for algorithmic datasets

* update difficulty metadata for algebra datasets

* use tuples

* update tests

* update tests
This commit is contained in:
Oliver Stanley 2025-03-20 09:27:03 +00:00 committed by GitHub
parent b69c35818a
commit 7475a20700
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
80 changed files with 304 additions and 126 deletions

View file

@ -96,7 +96,12 @@ class BasicArithmeticDataset(ProceduralDataset):
"answer": str(result),
"metadata": {
"expression": expression,
"difficulty": {"num_terms": num_terms, "num_digits": num_digits},
"num_terms": num_terms,
"num_digits": num_digits,
"difficulty": {
"num_terms": (self.config.min_terms, self.config.max_terms),
"num_digits": (self.config.min_digits, self.config.max_digits),
},
},
}

View file

@ -64,11 +64,13 @@ class ChainSumDataset(ProceduralDataset):
"question": f"State the final answer to the following arithmetic problem: {expression} =",
"answer": str(result),
"metadata": {
"difficulty": {
"num_terms": num_terms,
"num_digits": num_digits,
},
"num_terms": num_terms,
"num_digits": num_digits,
"expression": expression,
"difficulty": {
"num_terms": (self.config.min_terms, self.config.max_terms),
"num_digits": (self.config.min_digits, self.config.max_digits),
},
},
}

View file

@ -46,7 +46,10 @@ class CountBitsDataset(ProceduralDataset):
"number": number,
"solution": answer,
"binary": binary,
"difficulty": {"n": number},
"n": number,
"difficulty": {
"n": (self.config.min_n, self.config.max_n),
},
},
}

View file

@ -189,9 +189,11 @@ class DecimalArithmeticDataset(ProceduralDataset):
"question": problem_str,
"answer": str(answer),
"metadata": {
"decimal_places": decimal_places,
"num_terms": terms,
"difficulty": {
"decimal_places": decimal_places,
"num_terms": terms,
"decimal_places": (self.config.min_num_decimal_places, self.config.max_num_decimal_places),
"num_terms": (self.config.min_terms, self.config.max_terms),
},
},
}

View file

@ -66,11 +66,14 @@ class DecimalChainSumDataset(ProceduralDataset):
"question": f"State the final answer to the following arithmetic problem: {expression} =",
"answer": str(result),
"metadata": {
"difficulty": {
"num_terms": num_terms,
"num_digits": num_digits,
},
"num_terms": num_terms,
"num_digits": num_digits,
"expression": expression,
"difficulty": {
"num_terms": (self.config.min_terms, self.config.max_terms),
"num_digits": (self.config.min_digits, self.config.max_digits),
"decimal_places": (self.config.min_decimal_places, self.config.max_decimal_places),
},
},
}

View file

@ -124,11 +124,11 @@ class DiceDataset(ProceduralDataset):
"question": puzzle_str,
"answer": answer_str,
"metadata": {
"puzzle": puzzle,
"difficulty": {
"num_dice": self.config.num_dice,
"max_dice_size": self.config.max_dice_size,
},
"puzzle": puzzle,
},
}

View file

@ -120,9 +120,10 @@ class FractionSimplificationDataset(ProceduralDataset):
"simplified_denominator": simple_den,
"reduction_factor": num // simple_num, # Will be same as den // simple_den
"style": style,
"factor": factor,
"difficulty": {
"factor": factor,
"value": (simple_num, simple_den),
"value": (self.config.min_value, self.config.max_value),
"factor": (self.config.min_factor, self.config.max_factor),
},
},
}

View file

@ -64,9 +64,10 @@ class GCDDataset(ProceduralDataset):
"metadata": {
"numbers": numbers,
"result": result,
"num_terms": num_terms,
"difficulty": {
"num_terms": num_terms,
"max_value": self.config.max_value,
"num_terms": (self.config.min_numbers, self.config.max_numbers),
"max_value": (self.config.min_value, self.config.max_value),
},
},
}

View file

@ -67,7 +67,7 @@ class LCMDataset(ProceduralDataset):
"numbers": numbers,
"result": result,
"difficulty": {
"numbers": len(numbers),
"numbers": (self.config.min_numbers, self.config.max_numbers),
"value": (self.config.min_value, self.config.max_value),
},
},

View file

@ -118,11 +118,13 @@ class LegCountingDataset(ProceduralDataset):
"question": QUESTION_TEMPLATE.format(animals=", ".join(animal_list)),
"answer": str(total_legs),
"metadata": {
"difficulty": {
"num_animals": len(animals),
},
"animals": animals,
"num_animals": len(animals),
"total_legs": total_legs,
"difficulty": {
"num_animals": (self.config.min_animals, self.config.max_animals),
"num_instances": (self.config.min_instances, self.config.max_instances),
},
},
}

View file

@ -98,8 +98,9 @@ class NumberFormatDataset(ProceduralDataset):
"solution": answer,
"formatted_candidates": formatted_candidates,
"size": size,
"num_candidates": num_candidates,
"difficulty": {
"num_candidates": num_candidates,
"num_candidates": (self.config.min_num_candidates, self.config.max_num_candidates),
"n": (self.config.min_n, self.config.max_n),
"min_delta": self.config.max_delta,
},

View file

@ -73,7 +73,14 @@ class PowerFunctionDataset(ProceduralDataset):
return {
"question": QUESTION_TEMPLATE.format(base=base, exponent=exponent),
"answer": str(answer),
"metadata": {"base": base, "exponent": exponent, "solution": answer, "difficulty": {"exponent": exponent}},
"metadata": {
"base": base,
"exponent": exponent,
"solution": answer,
"difficulty": {
"exponent": (self.config.min_exponent, self.config.max_exponent),
},
},
}

View file

@ -83,7 +83,13 @@ class PrimeFactorizationDataset(ProceduralDataset):
f"(Example: for 12 the answer would be: 2 × 2 × 3)"
),
"answer": answer,
"metadata": {"number": number, "factors": factors, "difficulty": {"value": number}},
"metadata": {
"number": number,
"factors": factors,
"difficulty": {
"value": (self.config.min_value, self.config.max_value),
},
},
}

View file

@ -66,11 +66,13 @@ class ProductsDataset(ProceduralDataset):
"question": f"Solve the following multiplication: {expression}. Give only the result as your final answer.",
"answer": str(result),
"metadata": {
"difficulty": {
"num_terms": num_terms,
"num_digits": num_digits,
},
"expression": expression,
"num_terms": num_terms,
"num_digits": num_digits,
"difficulty": {
"num_terms": (self.config.min_terms, self.config.max_terms),
"num_digits": (self.config.min_digits, self.config.max_digits),
},
},
}