mirror of
https://github.com/lilakk/BLEUBERI.git
synced 2026-04-25 17:10:55 +00:00
46 lines
1.7 KiB
Bash
46 lines
1.7 KiB
Bash
MODE=$1
|
|
|
|
if [ -z "$GPT_EVAL_NAME" ]; then
|
|
echo "Warning: GPT_EVAL_NAME not set. Using default: gpt-4.1-mini"
|
|
fi
|
|
gpt_eval_name=${GPT_EVAL_NAME:-"gpt-4.1-mini"}
|
|
|
|
|
|
# if MODE is not score
|
|
if [ "$MODE" != "score_only" ];
|
|
then
|
|
python leaderboard/data_dir/_create_tables.py pairwise-gpt4t -1 &
|
|
python leaderboard/data_dir/_create_tables.py pairwise-llama -1 &
|
|
python leaderboard/data_dir/_create_tables.py pairwise-haiku -1 &
|
|
|
|
python leaderboard/data_dir/_create_tables.py pairwise-gpt4t 500 &
|
|
python leaderboard/data_dir/_create_tables.py pairwise-llama 500 &
|
|
python leaderboard/data_dir/_create_tables.py pairwise-haiku 500 &
|
|
|
|
python leaderboard/data_dir/_create_tables.py pairwise-gpt4t 1000 &
|
|
python leaderboard/data_dir/_create_tables.py pairwise-llama 1000 &
|
|
python leaderboard/data_dir/_create_tables.py pairwise-haiku 1000 &
|
|
|
|
python leaderboard/data_dir/_create_tables.py pairwise-gpt4t 1500 &
|
|
python leaderboard/data_dir/_create_tables.py pairwise-llama 1500 &
|
|
python leaderboard/data_dir/_create_tables.py pairwise-haiku 1500 &
|
|
fi
|
|
wait
|
|
|
|
# Score only
|
|
python leaderboard/data_dir/_create_tables.py score --gpt_eval_name $gpt_eval_name
|
|
|
|
python leaderboard/data_dir/_merge_results.py
|
|
|
|
# margin=3;tie_margin=2;K=4;dynamic=True;interval=16
|
|
margin=2;tie_margin=2;K=4;dynamic=True;interval=100;LM=500
|
|
python -m leaderboard.wb_elo --K $K --margin $margin --tie_margin $tie_margin --num_rounds 128 --dynamic $dynamic --interval $interval --num_processes 4 --length_margin $LM
|
|
|
|
python leaderboard/data_dir/_merge_results.py
|
|
|
|
if [ "$MODE" == "score_only" ];
|
|
then
|
|
python leaderboard/show_table.py --mode taskwise_score
|
|
else
|
|
python leaderboard/show_table.py --mode main
|
|
fi
|