mirror of
https://github.com/lilakk/BLEUBERI.git
synced 2026-04-19 12:58:12 +00:00
49 lines
No EOL
1.1 KiB
Bash
49 lines
No EOL
1.1 KiB
Bash
#!/bin/bash
|
|
|
|
model_arg=""
|
|
while [[ "$#" -gt 0 ]]; do
|
|
case $1 in
|
|
--model)
|
|
model_arg="$2"
|
|
shift
|
|
;;
|
|
*)
|
|
echo "Unknown parameter: $1"
|
|
exit 1
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
models=(
|
|
"meta-llama/Llama-3.1-8B-Instruct"
|
|
)
|
|
|
|
export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
|
|
|
export GPT_EVAL_NAME="gpt-4.1-mini" # TODO: change accordingly!
|
|
|
|
if [[ -n "$model_arg" ]]; then
|
|
models=("$model_arg")
|
|
fi
|
|
|
|
for model in "${models[@]}"; do
|
|
echo "====================================================="
|
|
echo "Starting WildBench evaluation for model: $model"
|
|
echo "====================================================="
|
|
|
|
if [[ "$model" == *"ckpts"* ]]; then
|
|
model_short=$(echo $model | rev | cut -d/ -f1,2 | rev | tr '/' '_')
|
|
else
|
|
model_short=$(basename $model)
|
|
fi
|
|
|
|
bash scripts/_common_vllm.sh $model $model_short 1
|
|
bash evaluation/run_eval_v2_instant.score.sh $model_short
|
|
|
|
echo "WildBench evaluation completed for: $model"
|
|
echo "====================================================="
|
|
echo ""
|
|
done
|
|
|
|
bash leaderboard/show_eval.sh score_only |