diff --git a/training/README.md b/training/README.md index be5b1485..52eaa123 100644 --- a/training/README.md +++ b/training/README.md @@ -12,8 +12,7 @@ NOTE: There seem to be some hanging issues with verl on older-gen hardware. Test ```bash git clone https://github.com/open-thought/reasoning-gym.git -cd reasoning-gym -pip install -e . +pip install -e ./reasoning-gym ``` 3. Install training dependencies (tested with verl @ f9dae2bb118f9fec36aaf99953dee77db6881052): @@ -22,8 +21,7 @@ pip install -e . pip install wheel "torch==2.6.0" torchvision pip install flash-attn --no-build-isolation git clone https://github.com/volcengine/verl.git -cd verl -pip install -e .[vllm] +pip install -e ./verl[vllm] ``` 4. Log in to HF and W&B: diff --git a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml index 017912d1..0a32a24c 100644 --- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml +++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml @@ -122,6 +122,7 @@ actor_rollout_ref: ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size rollout: name: vllm + mode: sync temperature: 1.0 top_k: -1 # 0 for hf rollout, -1 for vllm rollout top_p: 1