mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
57 lines
2.4 KiB
Python
57 lines
2.4 KiB
Python
"""Display utilities for formatting evaluation results and tables."""
|
|
|
|
from typing import Dict
|
|
|
|
|
|
def display_metrics_table(
|
|
task_name: str, metrics: Dict, start_time: float, end_time: float
|
|
) -> None:
|
|
"""
|
|
Display evaluation results in a formatted markdown table.
|
|
|
|
Args:
|
|
task_name: Name of the evaluation task
|
|
metrics: Dictionary of metric names to values
|
|
start_time: Start time of evaluation (unix timestamp)
|
|
end_time: End time of evaluation (unix timestamp)
|
|
"""
|
|
print(f"\nEvaluation Results: {task_name}")
|
|
|
|
# Prepare data for column width calculation
|
|
clean_metric_names = [
|
|
metric_name.replace("eval/", "").replace("_", " ")
|
|
for metric_name in metrics.keys()
|
|
]
|
|
formatted_values = [f"{value:.4f}" for value in metrics.values()]
|
|
|
|
# Calculate dynamic column widths
|
|
col_groups = max(len(task_name), len("Groups"))
|
|
col_version = max(len("1"), len("Version"))
|
|
col_filter = max(len("none"), len("Filter"))
|
|
col_nshot = max(len(""), len("n-shot"))
|
|
col_metric = max(max(len(name) for name in clean_metric_names), len("Metric"))
|
|
col_dir = 3 # Fixed for directional indicators
|
|
col_value = max(max(len(val) for val in formatted_values), len("Value"))
|
|
col_pm = 3 # Fixed for "±" symbol
|
|
col_stderr = max(len("0.0000"), len("Stderr"))
|
|
|
|
# Header
|
|
print(
|
|
f"|{'Groups':<{col_groups}}|{'Version':<{col_version}}|{'Filter':<{col_filter}}|{'n-shot':<{col_nshot}}|{'Metric':<{col_metric}}|{'':^{col_dir}}|{'Value':>{col_value}}|{'':^{col_pm}}|{'Stderr':>{col_stderr}}|" # noqa: E501
|
|
)
|
|
|
|
# Separator
|
|
print(
|
|
f"|{'-' * col_groups}|{'-' * col_version}|{'-' * col_filter}|{'-' * col_nshot}|{'-' * col_metric}|{'-' * col_dir}|{'-' * (col_value-1)}:|{'-' * col_pm}|{'-' * (col_stderr-1)}:|" # noqa: E501
|
|
)
|
|
|
|
# Data rows
|
|
for metric_name, metric_value in metrics.items():
|
|
clean_metric_name = metric_name.replace("eval/", "").replace("_", " ")
|
|
direction = "↑" if "correct" in metric_name or "acc" in metric_name else ""
|
|
|
|
print(
|
|
f"|{task_name:<{col_groups}}|{1:<{col_version}}|{'none':<{col_filter}}|{'':<{col_nshot}}|{clean_metric_name:<{col_metric}}|{direction:^{col_dir}}|{metric_value:>{col_value}.4f}|{'±':^{col_pm}}|{'0.0000':>{col_stderr}}|" # noqa: E501
|
|
)
|
|
|
|
print(f"Evaluation completed in {end_time - start_time:.2f} seconds")
|