#!/bin/bash #SBATCH --job-name=grpo_multinode #SBATCH -D . #SBATCH --partition=TODO #SBATCH --account=TODO #SBATCH --output=output-%x.%j #SBATCH --error=error-%x.%j #SBATCH --nodes=2 # number of nodes #SBATCH --ntasks-per-node=1 # number of MP tasks #SBATCH --gres=gpu:2 # number of GPUs per node #SBATCH --cpus-per-task=8 # number of cores per tasks #SBATCH --mem=128G #SBATCH --time=48:00:00 # maximum execution time (HH:MM:SS) #SBATCH --comment "Key=Monitoring,Value=ON" #SBATCH --exclusive ###################### ### Set environment ## ###################### ulimit -s unlimited MAMBA_ENV="tina" eval "$(mamba shell hook --shell bash)" && mamba activate "${MAMBA_ENV}" echo "START TIME: $(date)" echo "PYTHON ENV: $(which python)" source "./scripts/set/set_vars.sh" export GPUS_PER_NODE=2 ###################### ###################### #### Set network ##### ###################### head_node_ip=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) ###################### export LAUNCHER="accelerate launch \ --num_processes $((SLURM_NNODES * GPUS_PER_NODE)) \ --num_machines $SLURM_NNODES \ --machine_rank $SLURM_NODEID \ --rdzv_backend c10d \ --main_process_ip $head_node_ip \ --main_process_port 29500 \ " PY_SCRIPT="./tina/post_train_hf/grpo.py" PY_CONFIG="./recipes/DeepSeek-R1-Distill-Qwen-1.5B/grpo/model_curated_deepscaler.yaml" # This step is necessary because accelerate launch does not handle multiline arguments properly export CMD="$LAUNCHER $PY_SCRIPT --config $PY_CONFIG" srun $CMD