Add BLEUBERI environment for reference-based RL

2026-05-01 17:45:16 +00:00 · 2025-06-08 18:02:33 -05:00 · 2025-06-08 18:02:33 -05:00 · 5bb5bd2c3d
commit 5bb5bd2c3d
parent 3f6015e622
7 changed files with 948 additions and 0 deletions
--- a/environments/bleuberi/init.py
+++ b/environments/bleuberi/init.py
@ -0,0 +1,12 @@
+"""
+BLEUBERI: BLEU-based environment for instruction following.
+
+This environment uses BLEU scores as a reward function for training
+models to follow instructions. Based on the paper:
+"BLEUBERI: BLEU is a surprisingly effective reward for instruction following"
+https://arxiv.org/abs/2505.11080
+"""
+
+__all__ = ["BLEUBERIEnv"]
+
+from .bleuberi_env import BLEUBERIEnv  # noqa