BLEUBERI/eval/WildBench/run_wildbench_instant.sh
2025-06-04 20:36:43 +00:00

49 lines
No EOL
1.1 KiB
Bash

#!/bin/bash
model_arg=""
while [[ "$#" -gt 0 ]]; do
case $1 in
--model)
model_arg="$2"
shift
;;
*)
echo "Unknown parameter: $1"
exit 1
;;
esac
shift
done
models=(
"meta-llama/Llama-3.1-8B-Instruct"
)
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export GPT_EVAL_NAME="gpt-4.1-mini" # TODO: change accordingly!
if [[ -n "$model_arg" ]]; then
models=("$model_arg")
fi
for model in "${models[@]}"; do
echo "====================================================="
echo "Starting WildBench evaluation for model: $model"
echo "====================================================="
if [[ "$model" == *"ckpts"* ]]; then
model_short=$(echo $model | rev | cut -d/ -f1,2 | rev | tr '/' '_')
else
model_short=$(basename $model)
fi
bash scripts/_common_vllm.sh $model $model_short 1
bash evaluation/run_eval_v2_instant.score.sh $model_short
echo "WildBench evaluation completed for: $model"
echo "====================================================="
echo ""
done
bash leaderboard/show_eval.sh score_only