diff --git a/training/configs/qwen2.5_3b_grpo.yaml b/training/configs/qwen2.5_3b_grpo.yaml
index b4a9bac1..14b4c852 100644
--- a/training/configs/qwen2.5_3b_grpo.yaml
+++ b/training/configs/qwen2.5_3b_grpo.yaml
@@ -37,7 +37,7 @@ data:
   val_files: test.parquet
   prompt_key: prompt
   max_prompt_length: 512
-  max_response_length: 1024
+  max_response_length: 4096
   train_batch_size: 16
   val_batch_size: 16
   return_raw_input_ids: True  # This should be set to true when the tokenizer between policy and rm differs
@@ -57,7 +57,7 @@ actor_rollout_ref:
     ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
     ppo_micro_batch_size_per_gpu: 8
     use_dynamic_bsz: False
-    ppo_max_token_len_per_gpu: 12288 # n * ${data.max_prompt_length} + ${data.max_response_length}
+    ppo_max_token_len_per_gpu: 36864 # n * ${data.max_prompt_length} + ${data.max_response_length}
     grad_clip: 1.0
     clip_ratio: 0.2
     entropy_coeff: 0.001