mirror of
https://github.com/lilakk/BLEUBERI.git
synced 2026-04-19 12:58:12 +00:00
34 lines
1.2 KiB
Bash
34 lines
1.2 KiB
Bash
#!/bin/bash
|
|
|
|
# to only use the Tulu3 reference (default setup in our main experiments):
|
|
python create_training_data.py grpo \
|
|
--hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \
|
|
--ref_models gold \
|
|
--selection_mode hard \
|
|
--model Qwen/Qwen2.5-7B \
|
|
--metric bleu \
|
|
--num_examples 5000
|
|
|
|
# to use 5 references:
|
|
python create_training_data.py grpo \
|
|
--hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \
|
|
--ref_models gold claude-3-7-sonnet@20250219 deepseek-chat-v3 gemini-2.5-pro-exp-03-25 o4-mini-2025-04-16 \
|
|
--selection_mode hard \
|
|
--model Qwen/Qwen2.5-7B \
|
|
--metric bleu \
|
|
--num_examples 5000
|
|
|
|
# to score the data using RM-8B instead of BLEU:
|
|
python create_training_data.py grpo \
|
|
--hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \
|
|
--selection_mode hard \
|
|
--model Qwen/Qwen2.5-7B \
|
|
--metric rm \
|
|
--num_examples 5000
|
|
|
|
# to create SFT data based on an existing GRPO training dataset:
|
|
python create_training_data.py sft \
|
|
--input_data_path ../data/data_grpo/BLEUBERI-Tulu3-50k_bleu_Qwen2.5-7B_5ref-gold-claude-deepseek-gemini-o4mini_hard_5000
|
|
|
|
python create_training_data.py sft \
|
|
--input_data_path ../data/data_grpo/BLEUBERI-Tulu3-50k_bleu_Qwen2.5-7B_1ref-gold_hard_5000
|