(evals): Medium configs (#415)

* updated medium configs

* fix problematic curriculum values / small issues causing exceptions to be raised

* optimus alpha config

* all configs so far

* fix tests
This commit is contained in:
Zafir Stojanovski 2025-04-14 08:25:31 +02:00 committed by GitHub
parent cd1a9ea58b
commit 290bfc4fdd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 7050 additions and 63 deletions

View file

@ -109,7 +109,7 @@ categories:
- dataset: jugs
params:
num_jugs: 4
difficulty: 50
difficulty: 10
- dataset: letter_counting
params:
min_words: 25
@ -152,10 +152,10 @@ categories:
max_length: 100
- dataset: palindrome_partitioning
params:
min_string_len: 50
max_string_len: 100
min_substring_palindrome_len: 5
max_substring_palindrome_len: 10
min_string_len: 5
max_string_len: 15
min_substring_palindrome_len: 1
max_substring_palindrome_len: 5
- dataset: pool_matrix
params:
min_rows: 25
@ -234,8 +234,8 @@ categories:
mirrors_weights: [0.2, 0.2, 0.2, 0.2, 0.2]
- dataset: rearc
params:
pso_difficulty_weights: [0, 0, 0, 1, 0, 0, 0, 0]
rng_difficulty_weights: [0, 0, 0, 1, 0, 0, 0, 0]
pso_difficulty_weights: [0, 0, 0, 1, 0, 0, 0]
rng_difficulty_weights: [0, 0, 0, 1, 0, 0, 0]
- category: arithmetic
datasets:
- dataset: basic_arithmetic
@ -361,8 +361,8 @@ categories:
max_num_statements: 500
- dataset: number_sequence
params:
min_terms: 8
max_terms: 12
min_terms: 5
max_terms: 10
min_value: -500
max_value: 500
max_complexity: 3
@ -378,16 +378,16 @@ categories:
datasets:
- dataset: countdown
params:
min_numbers: 6
min_numbers: 3
max_numbers: 9
min_target: 100
max_target: 1000
min_value: 1
max_value: 250
max_value: 100
- dataset: emoji_mystery
params:
min_words_in_sentence: 20
max_words_in_sentence: 40
min_words_in_sentence: 10
max_words_in_sentence: 30
- dataset: futoshiki
params:
min_board_size: 6
@ -410,8 +410,8 @@ categories:
params:
min_grid_size: 25
max_grid_size: 50
min_dist: 25
max_dist: 50
min_dist: 10
max_dist: 15
- dataset: mini_sudoku
params:
min_empty: 6