diff --git a/examples/veRL/README.md b/examples/veRL/README.md index e2ec6e50..5904cc8a 100644 --- a/examples/veRL/README.md +++ b/examples/veRL/README.md @@ -1,19 +1,32 @@ ### env setup ``` -conda create --name verl python=3.12 -y +conda create --name verl python=3.11 -y conda activate verl pip install flash-attn --no-build-isolation -pip install vllm==0.7.0 ray wandb +pip install ray wandb +# pip3 install vllm==0.7.0 +pip3 install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly ``` +Regarding vllm>0.7 see: [docs](https://verl.readthedocs.io/en/latest/README_vllm0.7.html) + + ### clone and install veRL -tested with verl HEAD a65c9157bc0b85b64cd753de19f94e80a11bd871 +tested with verl HEAD 0dfcb7f99e299940e1792a386df13c7591df351a ``` git clone https://github.com/volcengine/verl.git cd verl pip install -e . ``` + + +Optionally log in to huggingface hub and wandb with your keys: + +``` +huggingface-cli login +wandb login +``` diff --git a/examples/veRL/config/ppo_trainer.yaml b/examples/veRL/config/ppo_trainer.yaml index b294a7cb..a3d167ea 100644 --- a/examples/veRL/config/ppo_trainer.yaml +++ b/examples/veRL/config/ppo_trainer.yaml @@ -45,7 +45,6 @@ actor_rollout_ref: # transformer_layer_cls_to_wrap: None min_num_params: 0 param_offload: False - grad_offload: False optimizer_offload: False fsdp_size: -1 ref: @@ -104,7 +103,6 @@ critic: use_remove_padding: False fsdp_config: param_offload: False - grad_offload: False optimizer_offload: False wrap_policy: # transformer_layer_cls_to_wrap: None @@ -158,10 +156,16 @@ trainer: project_name: verl_examples experiment_name: gsm8k logger: [ 'console', 'wandb' ] + val_generations_to_log_to_wandb: 0 nnodes: 1 n_gpus_per_node: 8 save_freq: -1 + # auto: find the last ckpt to resume. If can't find, start from scratch + resume_mode: auto # or auto or resume_path if + resume_from_path: False test_freq: -1 critic_warmup: 0 - default_hdfs_dir: ~/experiments/gsm8k/ppo/${trainer.experiment_name} + default_hdfs_dir: null + remove_previous_ckpt_in_save: False + del_local_ckpt_after_load: False default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}