atropos/environments/bleuberi/__init__.py
2025-09-08 11:21:27 -05:00

12 lines
373 B
Python

"""
BLEUBERI: BLEU-based environment for instruction following.
This environment uses BLEU scores as a reward function for training
models to follow instructions. Based on the paper:
"BLEUBERI: BLEU is a surprisingly effective reward for instruction following"
https://arxiv.org/abs/2505.11080
"""
__all__ = ["BLEUBERIEnv"]
from .bleuberi_env import BLEUBERIEnv # noqa