BLEUBERI/training/create_training_data.sh
2025-06-04 20:36:43 +00:00

34 lines
1.2 KiB
Bash

#!/bin/bash
# to only use the Tulu3 reference (default setup in our main experiments):
python create_training_data.py grpo \
--hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \
--ref_models gold \
--selection_mode hard \
--model Qwen/Qwen2.5-7B \
--metric bleu \
--num_examples 5000
# to use 5 references:
python create_training_data.py grpo \
--hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \
--ref_models gold claude-3-7-sonnet@20250219 deepseek-chat-v3 gemini-2.5-pro-exp-03-25 o4-mini-2025-04-16 \
--selection_mode hard \
--model Qwen/Qwen2.5-7B \
--metric bleu \
--num_examples 5000
# to score the data using RM-8B instead of BLEU:
python create_training_data.py grpo \
--hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \
--selection_mode hard \
--model Qwen/Qwen2.5-7B \
--metric rm \
--num_examples 5000
# to create SFT data based on an existing GRPO training dataset:
python create_training_data.py sft \
--input_data_path ../data/data_grpo/BLEUBERI-Tulu3-50k_bleu_Qwen2.5-7B_5ref-gold-claude-deepseek-gemini-o4mini_hard_5000
python create_training_data.py sft \
--input_data_path ../data/data_grpo/BLEUBERI-Tulu3-50k_bleu_Qwen2.5-7B_1ref-gold_hard_5000