[eval-v1] async to speed up inference/evaluation

This commit is contained in:
rishabhranawat 2025-02-10 21:35:46 -08:00
parent eb25ab9656
commit be3d04e7cb
5 changed files with 261 additions and 76 deletions

View file

@ -0,0 +1,39 @@
[
{
"dataset_name": "letter_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.10800000000000001,
"total_examples": 10,
"timestamp": "2025-02-10T21:26:40.575060",
"config": {
"min_words": 5,
"max_words": 15,
"size": 10,
"seed": 42
}
},
{
"dataset_name": "propositional_logic",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.059,
"total_examples": 10,
"timestamp": "2025-02-10T21:26:44.955201",
"config": {
"size": 10,
"seed": 42
}
},
{
"dataset_name": "leg_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.40199999999999997,
"total_examples": 10,
"timestamp": "2025-02-10T21:26:45.852518",
"config": {
"min_animals": 3,
"max_animals": 8,
"size": 10,
"seed": 42
}
}
]

View file

@ -0,0 +1,39 @@
[
{
"dataset_name": "letter_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.157,
"total_examples": 10,
"timestamp": "2025-02-10T21:29:18.766288",
"config": {
"min_words": 5,
"max_words": 15,
"size": 10,
"seed": 42
}
},
{
"dataset_name": "propositional_logic",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.059,
"total_examples": 10,
"timestamp": "2025-02-10T21:29:24.026918",
"config": {
"size": 10,
"seed": 42
}
},
{
"dataset_name": "leg_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.40199999999999997,
"total_examples": 10,
"timestamp": "2025-02-10T21:29:23.650182",
"config": {
"min_animals": 3,
"max_animals": 8,
"size": 10,
"seed": 42
}
}
]

View file

@ -0,0 +1,39 @@
[
{
"dataset_name": "letter_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.157,
"total_examples": 10,
"timestamp": "2025-02-10T21:33:46.747429",
"config": {
"min_words": 5,
"max_words": 15,
"size": 10,
"seed": 42
}
},
{
"dataset_name": "propositional_logic",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.059,
"total_examples": 10,
"timestamp": "2025-02-10T21:33:51.422633",
"config": {
"size": 10,
"seed": 42
}
},
{
"dataset_name": "leg_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.40199999999999997,
"total_examples": 10,
"timestamp": "2025-02-10T21:33:52.022623",
"config": {
"min_animals": 3,
"max_animals": 8,
"size": 10,
"seed": 42
}
}
]

View file

@ -0,0 +1,39 @@
[
{
"dataset_name": "letter_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.157,
"total_examples": 10,
"timestamp": "2025-02-10T21:34:13.347168",
"config": {
"min_words": 5,
"max_words": 15,
"size": 10,
"seed": 42
}
},
{
"dataset_name": "propositional_logic",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.10800000000000001,
"total_examples": 10,
"timestamp": "2025-02-10T21:34:18.146056",
"config": {
"size": 10,
"seed": 42
}
},
{
"dataset_name": "leg_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.40199999999999997,
"total_examples": 10,
"timestamp": "2025-02-10T21:34:18.315364",
"config": {
"min_animals": 3,
"max_animals": 8,
"size": 10,
"seed": 42
}
}
]