#!/bin/bash # to only use the Tulu3 reference (default setup in our main experiments): python create_training_data.py grpo \ --hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \ --ref_models gold \ --selection_mode hard \ --model Qwen/Qwen2.5-7B \ --metric bleu \ --num_examples 5000 # to use 5 references: python create_training_data.py grpo \ --hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \ --ref_models gold claude-3-7-sonnet@20250219 deepseek-chat-v3 gemini-2.5-pro-exp-03-25 o4-mini-2025-04-16 \ --selection_mode hard \ --model Qwen/Qwen2.5-7B \ --metric bleu \ --num_examples 5000 # to score the data using RM-8B instead of BLEU: python create_training_data.py grpo \ --hf_dataset_path yapeichang/BLEUBERI-Tulu3-50k \ --selection_mode hard \ --model Qwen/Qwen2.5-7B \ --metric rm \ --num_examples 5000 # to create SFT data based on an existing GRPO training dataset: python create_training_data.py sft \ --input_data_path ../data/data_grpo/BLEUBERI-Tulu3-50k_bleu_Qwen2.5-7B_5ref-gold-claude-deepseek-gemini-o4mini_hard_5000 python create_training_data.py sft \ --input_data_path ../data/data_grpo/BLEUBERI-Tulu3-50k_bleu_Qwen2.5-7B_1ref-gold_hard_5000