move best-of-n selection to util

This commit is contained in:
Shannon Sands 2025-05-14 10:35:12 -07:00
parent 4c00e2b209
commit 21cc528b85
3 changed files with 102 additions and 17 deletions

View file

@ -16,6 +16,7 @@ from .advantages import (
compute_discounted_returns,
compute_grpo_process_supervision_advantages,
)
from .best_of_n_selection import select_best_index
__all__ = [
"ConfigHandler",
@ -28,4 +29,5 @@ __all__ = [
"compute_discounted_returns",
"compute_grpo_process_supervision_advantages",
"ensure_trajectory_token_limit",
"select_best_index",
]