mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-27 17:23:19 +00:00
(evals): Medium configs (#415)
* updated medium configs * fix problematic curriculum values / small issues causing exceptions to be raised * optimus alpha config * all configs so far * fix tests
This commit is contained in:
parent
cd1a9ea58b
commit
290bfc4fdd
25 changed files with 7050 additions and 63 deletions
|
|
@ -99,41 +99,41 @@ def test_rearc_curriculum():
|
|||
assert base_cfg.size == 50
|
||||
|
||||
# Default levels should have weights that select only the easiest tasks
|
||||
assert base_cfg.pso_difficulty_weights == [1, 0, 0, 0, 0, 0, 0, 0]
|
||||
assert base_cfg.rng_difficulty_weights == [1, 0, 0, 0, 0, 0, 0, 0]
|
||||
assert base_cfg.pso_difficulty_weights == [1, 0, 0, 0, 0, 0, 0]
|
||||
assert base_cfg.rng_difficulty_weights == [1, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
# Test incrementing pso_difficulty attribute
|
||||
curriculum.increment_attr_level("pso_difficulty_weights")
|
||||
pso_cfg = curriculum.generate_configuration(base_value)
|
||||
assert pso_cfg.pso_difficulty_weights == [0, 1, 0, 0, 0, 0, 0, 0] # Level 1: second difficulty range
|
||||
assert pso_cfg.rng_difficulty_weights == [1, 0, 0, 0, 0, 0, 0, 0] # RNG unchanged
|
||||
assert pso_cfg.pso_difficulty_weights == [0, 1, 0, 0, 0, 0, 0] # Level 1: second difficulty range
|
||||
assert pso_cfg.rng_difficulty_weights == [1, 0, 0, 0, 0, 0, 0] # RNG unchanged
|
||||
|
||||
# Test incrementing rng_difficulty attribute
|
||||
curriculum.increment_attr_level("rng_difficulty_weights")
|
||||
rng_cfg = curriculum.generate_configuration(base_value)
|
||||
assert rng_cfg.pso_difficulty_weights == [0, 1, 0, 0, 0, 0, 0, 0] # PSO unchanged
|
||||
assert rng_cfg.rng_difficulty_weights == [0, 1, 0, 0, 0, 0, 0, 0] # Level 1: second difficulty range
|
||||
assert rng_cfg.pso_difficulty_weights == [0, 1, 0, 0, 0, 0, 0] # PSO unchanged
|
||||
assert rng_cfg.rng_difficulty_weights == [0, 1, 0, 0, 0, 0, 0] # Level 1: second difficulty range
|
||||
|
||||
# Test decrementing pso_difficulty attribute
|
||||
curriculum.decrement_attr_level("pso_difficulty_weights")
|
||||
decr_cfg = curriculum.generate_configuration(base_value)
|
||||
assert decr_cfg.pso_difficulty_weights == [1, 0, 0, 0, 0, 0, 0, 0] # Back to level 0
|
||||
assert decr_cfg.rng_difficulty_weights == [0, 1, 0, 0, 0, 0, 0, 0] # RNG unchanged
|
||||
assert decr_cfg.pso_difficulty_weights == [1, 0, 0, 0, 0, 0, 0] # Back to level 0
|
||||
assert decr_cfg.rng_difficulty_weights == [0, 1, 0, 0, 0, 0, 0] # RNG unchanged
|
||||
|
||||
# Test global level setting to higher level
|
||||
curriculum.set_global_level(3) # Set all attributes to level 3
|
||||
global_cfg = curriculum.generate_configuration(base_value)
|
||||
assert global_cfg.pso_difficulty_weights == [0, 0, 0, 1, 0, 0, 0, 0] # Level 3
|
||||
assert global_cfg.rng_difficulty_weights == [0, 0, 0, 1, 0, 0, 0, 0] # Level 3
|
||||
assert global_cfg.pso_difficulty_weights == [0, 0, 0, 1, 0, 0, 0] # Level 3
|
||||
assert global_cfg.rng_difficulty_weights == [0, 0, 0, 1, 0, 0, 0] # Level 3
|
||||
|
||||
# Test increment global level
|
||||
curriculum.increment_global_level() # Should go to level 4
|
||||
incr_global_cfg = curriculum.generate_configuration(base_value)
|
||||
assert incr_global_cfg.pso_difficulty_weights == [0, 0, 0, 0, 1, 0, 0, 0] # Level 4
|
||||
assert incr_global_cfg.rng_difficulty_weights == [0, 0, 0, 0, 1, 0, 0, 0] # Level 4
|
||||
assert incr_global_cfg.pso_difficulty_weights == [0, 0, 0, 0, 1, 0, 0] # Level 4
|
||||
assert incr_global_cfg.rng_difficulty_weights == [0, 0, 0, 0, 1, 0, 0] # Level 4
|
||||
|
||||
# Test decrement global level
|
||||
curriculum.decrement_global_level() # Should go back to level 3
|
||||
decr_global_cfg = curriculum.generate_configuration(base_value)
|
||||
assert decr_global_cfg.pso_difficulty_weights == [0, 0, 0, 1, 0, 0, 0, 0] # Level 3
|
||||
assert decr_global_cfg.rng_difficulty_weights == [0, 0, 0, 1, 0, 0, 0, 0] # Level 3
|
||||
assert decr_global_cfg.pso_difficulty_weights == [0, 0, 0, 1, 0, 0, 0] # Level 3
|
||||
assert decr_global_cfg.rng_difficulty_weights == [0, 0, 0, 1, 0, 0, 0] # Level 3
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue