(evals): Medium configs (#415)

* updated medium configs

* fix problematic curriculum values / small issues causing exceptions to be raised

* optimus alpha config

* all configs so far

* fix tests
This commit is contained in:
Zafir Stojanovski 2025-04-14 08:25:31 +02:00 committed by GitHub
parent cd1a9ea58b
commit 290bfc4fdd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 7050 additions and 63 deletions

View file

@ -99,41 +99,41 @@ def test_rearc_curriculum():
assert base_cfg.size == 50
# Default levels should have weights that select only the easiest tasks
assert base_cfg.pso_difficulty_weights == [1, 0, 0, 0, 0, 0, 0, 0]
assert base_cfg.rng_difficulty_weights == [1, 0, 0, 0, 0, 0, 0, 0]
assert base_cfg.pso_difficulty_weights == [1, 0, 0, 0, 0, 0, 0]
assert base_cfg.rng_difficulty_weights == [1, 0, 0, 0, 0, 0, 0]
# Test incrementing pso_difficulty attribute
curriculum.increment_attr_level("pso_difficulty_weights")
pso_cfg = curriculum.generate_configuration(base_value)
assert pso_cfg.pso_difficulty_weights == [0, 1, 0, 0, 0, 0, 0, 0] # Level 1: second difficulty range
assert pso_cfg.rng_difficulty_weights == [1, 0, 0, 0, 0, 0, 0, 0] # RNG unchanged
assert pso_cfg.pso_difficulty_weights == [0, 1, 0, 0, 0, 0, 0] # Level 1: second difficulty range
assert pso_cfg.rng_difficulty_weights == [1, 0, 0, 0, 0, 0, 0] # RNG unchanged
# Test incrementing rng_difficulty attribute
curriculum.increment_attr_level("rng_difficulty_weights")
rng_cfg = curriculum.generate_configuration(base_value)
assert rng_cfg.pso_difficulty_weights == [0, 1, 0, 0, 0, 0, 0, 0] # PSO unchanged
assert rng_cfg.rng_difficulty_weights == [0, 1, 0, 0, 0, 0, 0, 0] # Level 1: second difficulty range
assert rng_cfg.pso_difficulty_weights == [0, 1, 0, 0, 0, 0, 0] # PSO unchanged
assert rng_cfg.rng_difficulty_weights == [0, 1, 0, 0, 0, 0, 0] # Level 1: second difficulty range
# Test decrementing pso_difficulty attribute
curriculum.decrement_attr_level("pso_difficulty_weights")
decr_cfg = curriculum.generate_configuration(base_value)
assert decr_cfg.pso_difficulty_weights == [1, 0, 0, 0, 0, 0, 0, 0] # Back to level 0
assert decr_cfg.rng_difficulty_weights == [0, 1, 0, 0, 0, 0, 0, 0] # RNG unchanged
assert decr_cfg.pso_difficulty_weights == [1, 0, 0, 0, 0, 0, 0] # Back to level 0
assert decr_cfg.rng_difficulty_weights == [0, 1, 0, 0, 0, 0, 0] # RNG unchanged
# Test global level setting to higher level
curriculum.set_global_level(3) # Set all attributes to level 3
global_cfg = curriculum.generate_configuration(base_value)
assert global_cfg.pso_difficulty_weights == [0, 0, 0, 1, 0, 0, 0, 0] # Level 3
assert global_cfg.rng_difficulty_weights == [0, 0, 0, 1, 0, 0, 0, 0] # Level 3
assert global_cfg.pso_difficulty_weights == [0, 0, 0, 1, 0, 0, 0] # Level 3
assert global_cfg.rng_difficulty_weights == [0, 0, 0, 1, 0, 0, 0] # Level 3
# Test increment global level
curriculum.increment_global_level() # Should go to level 4
incr_global_cfg = curriculum.generate_configuration(base_value)
assert incr_global_cfg.pso_difficulty_weights == [0, 0, 0, 0, 1, 0, 0, 0] # Level 4
assert incr_global_cfg.rng_difficulty_weights == [0, 0, 0, 0, 1, 0, 0, 0] # Level 4
assert incr_global_cfg.pso_difficulty_weights == [0, 0, 0, 0, 1, 0, 0] # Level 4
assert incr_global_cfg.rng_difficulty_weights == [0, 0, 0, 0, 1, 0, 0] # Level 4
# Test decrement global level
curriculum.decrement_global_level() # Should go back to level 3
decr_global_cfg = curriculum.generate_configuration(base_value)
assert decr_global_cfg.pso_difficulty_weights == [0, 0, 0, 1, 0, 0, 0, 0] # Level 3
assert decr_global_cfg.rng_difficulty_weights == [0, 0, 0, 1, 0, 0, 0, 0] # Level 3
assert decr_global_cfg.pso_difficulty_weights == [0, 0, 0, 1, 0, 0, 0] # Level 3
assert decr_global_cfg.rng_difficulty_weights == [0, 0, 0, 1, 0, 0, 0] # Level 3