feat: add minimum batch allocation support for environments

- Add min_batch_allocation parameter to ensure environments contribute minimum proportion to each batch - Implement grab_batch_with_minimum_allocations function with proper scaling when allocations exceed 100% - Add mixed-size group buffering to handle variable-sized data submissions - Update server to use minimum allocation logic when any env has min_batch_allocation set - Add comprehensive tests for minimum allocation scenarios - Update documentation in API README and CONFIG.md - Update example environments to demonstrate the feature This feature allows critical environments to guarantee they contribute at least a specified proportion (0.0-1.0) to each training batch, ensuring important data sources are always represented during training. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-04-19 12:57:58 +00:00 · 2025-07-07 08:50:28 -05:00 · 2025-07-07 08:50:28 -05:00 · 08e14cc745
commit 08e14cc745
parent 4769eeb4a6
11 changed files with 1670 additions and 91 deletions
--- a/environments/tool_calling_server.py
+++ b/environments/tool_calling_server.py
@ -46,15 +46,17 @@ class SingleToolCallingEnv(BaseEnv):
            tokenizer_name="NousResearch/DeepHermes-3-Llama-3-8B-Preview",
            group_size=16,
            use_wandb=True,
+            max_num_workers_per_node=16,
            rollout_server_url="http://localhost:8000",
            total_steps=2000,
            batch_size=1024,
-            steps_per_eval=20,
+            steps_per_eval=25,
            max_token_length=1024 * 16,
            inference_weight=1.0,
            wandb_name="toolcall_think",
            eval_handling=EvalHandlingEnum.LIMIT_TRAIN,
            eval_limit_ratio=0.1,
+            min_batch_allocation=0.1,
        )
        server_configs = [
            APIServerConfig(
@ -113,7 +115,7 @@ class SingleToolCallingEnv(BaseEnv):
        full_dataset = full_dataset.shuffle(seed=42)

        # Create train/test split on the fly (e.g., 95% train, 5% test)
-        split_dataset = full_dataset.train_test_split(test_size=0.02, seed=42)
+        split_dataset = full_dataset.train_test_split(test_size=100, seed=42)

        # Keep the splits as is - no need to reformat
        self.train = split_dataset["train"]