mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
- Moved environments/bleuberi to environments/community/bleuberi
- Updated .gitmodules to reflect new submodule path
- Fixed pre-commit formatting issues
- Cleaned up test output files
12 lines
373 B
Python
12 lines
373 B
Python
"""
|
|
BLEUBERI: BLEU-based environment for instruction following.
|
|
|
|
This environment uses BLEU scores as a reward function for training
|
|
models to follow instructions. Based on the paper:
|
|
"BLEUBERI: BLEU is a surprisingly effective reward for instruction following"
|
|
https://arxiv.org/abs/2505.11080
|
|
"""
|
|
|
|
__all__ = ["BLEUBERIEnv"]
|
|
|
|
from .bleuberi_env import BLEUBERIEnv # noqa
|