mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
54 lines
1.6 KiB
Bash
54 lines
1.6 KiB
Bash
#!/bin/bash
|
|
|
|
|
|
#SBATCH --job-name=grpo_multinode
|
|
#SBATCH -D .
|
|
#SBATCH --partition=TODO
|
|
#SBATCH --account=TODO
|
|
#SBATCH --output=output-%x.%j
|
|
#SBATCH --error=error-%x.%j
|
|
#SBATCH --nodes=2 # number of nodes
|
|
#SBATCH --ntasks-per-node=1 # number of MP tasks
|
|
#SBATCH --gres=gpu:2 # number of GPUs per node
|
|
#SBATCH --cpus-per-task=8 # number of cores per tasks
|
|
#SBATCH --mem=128G
|
|
#SBATCH --time=48:00:00 # maximum execution time (HH:MM:SS)
|
|
#SBATCH --comment "Key=Monitoring,Value=ON"
|
|
#SBATCH --exclusive
|
|
|
|
######################
|
|
### Set environment ##
|
|
######################
|
|
|
|
ulimit -s unlimited
|
|
|
|
MAMBA_ENV="tina"
|
|
eval "$(mamba shell hook --shell bash)" && mamba activate "${MAMBA_ENV}"
|
|
echo "START TIME: $(date)"
|
|
echo "PYTHON ENV: $(which python)"
|
|
|
|
source "./scripts/set/set_vars.sh"
|
|
export GPUS_PER_NODE=2
|
|
######################
|
|
|
|
######################
|
|
#### Set network #####
|
|
######################
|
|
head_node_ip=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
|
|
######################
|
|
|
|
export LAUNCHER="accelerate launch \
|
|
--num_processes $((SLURM_NNODES * GPUS_PER_NODE)) \
|
|
--num_machines $SLURM_NNODES \
|
|
--machine_rank $SLURM_NODEID \
|
|
--rdzv_backend c10d \
|
|
--main_process_ip $head_node_ip \
|
|
--main_process_port 29500 \
|
|
"
|
|
|
|
PY_SCRIPT="./tina/post_train_hf/grpo.py"
|
|
PY_CONFIG="./recipes/DeepSeek-R1-Distill-Qwen-1.5B/grpo/model_curated_deepscaler.yaml"
|
|
|
|
# This step is necessary because accelerate launch does not handle multiline arguments properly
|
|
export CMD="$LAUNCHER $PY_SCRIPT --config $PY_CONFIG"
|
|
srun $CMD
|