mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-28 17:29:39 +00:00
[eval-v1] async to speed up inference/evaluation
This commit is contained in:
parent
eb25ab9656
commit
be3d04e7cb
5 changed files with 261 additions and 76 deletions
|
|
@ -0,0 +1,39 @@
|
|||
[
|
||||
{
|
||||
"dataset_name": "letter_counting",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.10800000000000001,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:26:40.575060",
|
||||
"config": {
|
||||
"min_words": 5,
|
||||
"max_words": 15,
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset_name": "propositional_logic",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.059,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:26:44.955201",
|
||||
"config": {
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset_name": "leg_counting",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.40199999999999997,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:26:45.852518",
|
||||
"config": {
|
||||
"min_animals": 3,
|
||||
"max_animals": 8,
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
[
|
||||
{
|
||||
"dataset_name": "letter_counting",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.157,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:29:18.766288",
|
||||
"config": {
|
||||
"min_words": 5,
|
||||
"max_words": 15,
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset_name": "propositional_logic",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.059,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:29:24.026918",
|
||||
"config": {
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset_name": "leg_counting",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.40199999999999997,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:29:23.650182",
|
||||
"config": {
|
||||
"min_animals": 3,
|
||||
"max_animals": 8,
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
[
|
||||
{
|
||||
"dataset_name": "letter_counting",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.157,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:33:46.747429",
|
||||
"config": {
|
||||
"min_words": 5,
|
||||
"max_words": 15,
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset_name": "propositional_logic",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.059,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:33:51.422633",
|
||||
"config": {
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset_name": "leg_counting",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.40199999999999997,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:33:52.022623",
|
||||
"config": {
|
||||
"min_animals": 3,
|
||||
"max_animals": 8,
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
[
|
||||
{
|
||||
"dataset_name": "letter_counting",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.157,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:34:13.347168",
|
||||
"config": {
|
||||
"min_words": 5,
|
||||
"max_words": 15,
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset_name": "propositional_logic",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.10800000000000001,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:34:18.146056",
|
||||
"config": {
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset_name": "leg_counting",
|
||||
"model": "google/gemini-2.0-flash-001",
|
||||
"average_score": 0.40199999999999997,
|
||||
"total_examples": 10,
|
||||
"timestamp": "2025-02-10T21:34:18.315364",
|
||||
"config": {
|
||||
"min_animals": 3,
|
||||
"max_animals": 8,
|
||||
"size": 10,
|
||||
"seed": 42
|
||||
}
|
||||
}
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue