[eval-v1] add timer

This commit is contained in:
rishabhranawat 2025-02-10 21:48:44 -08:00
parent 247464a47d
commit df5438498e
2 changed files with 40 additions and 1 deletions

View file

@ -123,7 +123,7 @@ async def main_async():
eval_start_time = time.time()
all_results = await evaluator.evaluate_datasets(dataset_configs)
print(f'Time taken to collect evaluation data: {time.time() - eval_start_time}')
print(f'Time taken to collect evaluation data: {time.time() - eval_start_time:.2f} seconds')
# Save results
output_file = os.path.join(
args.output_dir,

View file

@ -0,0 +1,39 @@
[
{
"dataset_name": "letter_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.059,
"total_examples": 10,
"timestamp": "2025-02-10T21:46:27.185026",
"config": {
"min_words": 5,
"max_words": 15,
"size": 10,
"seed": 42
}
},
{
"dataset_name": "propositional_logic",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.059,
"total_examples": 10,
"timestamp": "2025-02-10T21:46:31.805110",
"config": {
"size": 10,
"seed": 42
}
},
{
"dataset_name": "leg_counting",
"model": "google/gemini-2.0-flash-001",
"average_score": 0.40199999999999997,
"total_examples": 10,
"timestamp": "2025-02-10T21:46:31.805665",
"config": {
"min_animals": 3,
"max_animals": 8,
"size": 10,
"seed": 42
}
}
]