This commit is contained in:
hjc-puro 2025-07-11 09:52:19 +00:00
parent 72210cf4ad
commit 6e9baaf9d8
2 changed files with 2 additions and 9 deletions

View file

@ -15,9 +15,7 @@ def display_metrics_table(
start_time: Start time of evaluation (unix timestamp) start_time: Start time of evaluation (unix timestamp)
end_time: End time of evaluation (unix timestamp) end_time: End time of evaluation (unix timestamp)
""" """
print("\n" + "=" * 84) print(f"\nEvaluation Results: {task_name}")
print(f"Evaluation Results: {task_name}")
print("=" * 84)
# Column widths # Column widths
col_groups = 20 col_groups = 20
@ -49,6 +47,4 @@ def display_metrics_table(
f"|{task_name:<{col_groups}}|{1:<{col_version}}|{'none':<{col_filter}}|{'':<{col_nshot}}|{clean_metric_name:<{col_metric}}|{direction:<{col_dir}}|{metric_value:>{col_value}.4f}|{'±':<{col_pm}}|{'0.0000':>{col_stderr}}|" # noqa: E501 f"|{task_name:<{col_groups}}|{1:<{col_version}}|{'none':<{col_filter}}|{'':<{col_nshot}}|{clean_metric_name:<{col_metric}}|{direction:<{col_dir}}|{metric_value:>{col_value}.4f}|{'±':<{col_pm}}|{'0.0000':>{col_stderr}}|" # noqa: E501
) )
print("=" * 84) print(f"Evaluation completed in {end_time - start_time:.2f} seconds\n")
print(f"Evaluation completed in {end_time - start_time:.2f} seconds")
print("=" * 84 + "\n")

View file

@ -211,8 +211,6 @@ class GSM8kEnv(BaseEnv):
# Log evaluation results # Log evaluation results
eval_metrics = { eval_metrics = {
"eval/percent_correct": percent_correct, "eval/percent_correct": percent_correct,
"eval/total_samples": len(scores),
"eval/correct_samples": sum(scores),
} }
await self.evaluate_log( await self.evaluate_log(
@ -223,7 +221,6 @@ class GSM8kEnv(BaseEnv):
generation_parameters={ generation_parameters={
"temperature": 0.0, "temperature": 0.0,
"max_tokens": self.config.max_token_length, "max_tokens": self.config.max_token_length,
"split": "eval",
}, },
) )