Add BLEUBERI environment for reference-based RL

This commit is contained in:
Allan Niemerg 2025-06-08 18:02:33 -05:00
parent 3f6015e622
commit 5bb5bd2c3d
7 changed files with 948 additions and 0 deletions

View file

@ -0,0 +1,12 @@
"""
BLEUBERI: BLEU-based environment for instruction following.
This environment uses BLEU scores as a reward function for training
models to follow instructions. Based on the paper:
"BLEUBERI: BLEU is a surprisingly effective reward for instruction following"
https://arxiv.org/abs/2505.11080
"""
__all__ = ["BLEUBERIEnv"]
from .bleuberi_env import BLEUBERIEnv # noqa