diff --git a/example_trainer/data.py b/example_trainer/data.py
index bb3ebbdb..16a38564 100644
--- a/example_trainer/data.py
+++ b/example_trainer/data.py
@@ -5,7 +5,7 @@ Handles data retrieval from Atropos API, padding, batching,
 and advantage normalization.
 
 Also extracts inference logprobs for proper GRPO loss computation:
-- Inference logprobs serve as π_old (reference policy) for importance sampling
+- Inference logprobs are used in importance-ratio computation
 - They are batched and padded to align token-by-token with training labels
 """
 
diff --git a/example_trainer/training.py b/example_trainer/training.py
index 92e18f2e..035d45c7 100644
--- a/example_trainer/training.py
+++ b/example_trainer/training.py
@@ -287,7 +287,7 @@ def run_training_step(
         temperature_batches: List of temperature tensors
         config: Training configuration (includes clip_eps, warmup_steps)
         step_idx: Current global training step (0-based)
-        inference_logprob_batches: Batched logprobs from inference (π_old), aligned with labels
+        inference_logprob_batches: Rollout logprobs from inference, aligned with labels
 
     Returns:
         Dict of training metrics for this step