add minimal verifiers example (#472)

This commit is contained in:
Oliver Stanley 2025-06-20 16:31:02 +01:00 committed by GitHub
parent 9e79fc84b6
commit 49f3821098
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 97 additions and 0 deletions

View file

@ -0,0 +1,38 @@
## Setup
Prepare virtual environment, e.g.
```bash
python -m venv venv
source venv/bin/activate
```
Install dependencies
```bash
pip install -r requirements.txt
pip install flash-attn --no-build-isolation
```
Login to W&B and HuggingFace if desired
```bash
wandb login
huggingface-cli login
```
## Training
Here we assume two GPUs, with one used for inference (vLLM) and the other for training (accelerate). You may need to adjust some settings for different GPU configs.
Run the vLLM server for inference:
```bash
CUDA_VISIBLE_DEVICES=0 vf-vllm --model Qwen/Qwen2.5-1.5B-Instruct --tensor-parallel-size 1
```
Run the training script using accelerate:
```bash
CUDA_VISIBLE_DEVICES=1 accelerate launch --config-file zero3.yaml --num-processes 1 vf_rg.py
```

View file

@ -0,0 +1 @@
verifiers[all]

View file

@ -0,0 +1,36 @@
"""Example training script for using the Reasoning Gym environment in verifiers."""
import verifiers as vf
from verifiers.envs.reasoninggym_env import ReasoningGymEnv
model_name = f"Qwen/Qwen2.5-1.5B-Instruct"
model, tokenizer = vf.get_model_and_tokenizer(model_name)
vf_env = ReasoningGymEnv(
gym=[
"basic_arithmetic",
"bitwise_arithmetic",
"decimal_arithmetic",
],
num_samples=100,
num_eval_samples=50,
max_concurrent=100,
)
training_args = vf.grpo_defaults(run_name="reasoning-gym-test")
training_args.num_iterations = 1
training_args.per_device_train_batch_size = 4
training_args.num_generations = 8
training_args.gradient_accumulation_steps = 4
training_args.max_prompt_length = 1024
training_args.max_completion_length = 4096
training_args.max_steps = 100
trainer = vf.GRPOTrainer(
model=model,
processing_class=tokenizer,
env=vf_env,
args=training_args,
)
trainer.train()

View file

@ -0,0 +1,22 @@
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
deepspeed_multinode_launcher: standard
offload_optimizer_device: none
offload_param_device: none
zero3_init_flag: true
zero3_save_16bit_model: true
zero_stage: 3
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 1
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false