mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-30 17:40:36 +00:00
feat: add minimum batch allocation support for environments
- Add min_batch_allocation parameter to ensure environments contribute minimum proportion to each batch - Implement grab_batch_with_minimum_allocations function with proper scaling when allocations exceed 100% - Add mixed-size group buffering to handle variable-sized data submissions - Update server to use minimum allocation logic when any env has min_batch_allocation set - Add comprehensive tests for minimum allocation scenarios - Update documentation in API README and CONFIG.md - Update example environments to demonstrate the feature This feature allows critical environments to guarantee they contribute at least a specified proportion (0.0-1.0) to each training batch, ensuring important data sources are always represented during training. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
4769eeb4a6
commit
08e14cc745
11 changed files with 1670 additions and 91 deletions
|
|
@ -367,12 +367,12 @@ class InternBootcampEnv(BaseEnv):
|
|||
tokenizer_name="NousResearch/DeepHermes-3-Llama-3-8B-Preview",
|
||||
group_size=8,
|
||||
use_wandb=True,
|
||||
max_num_workers=64,
|
||||
max_num_workers_per_node=16,
|
||||
rollout_server_url="http://localhost:8000",
|
||||
total_steps=10000,
|
||||
batch_size=1024,
|
||||
steps_per_eval=100,
|
||||
max_token_length=16384,
|
||||
max_token_length=8192,
|
||||
inference_weight=1.0,
|
||||
wandb_name="intern_bootcamp_random_tasks",
|
||||
data_path_to_save_groups="data/intern_bootcamp_random_tasks.jsonl",
|
||||
|
|
@ -385,6 +385,7 @@ class InternBootcampEnv(BaseEnv):
|
|||
format_bonus=0.2,
|
||||
# Training parameters
|
||||
require_reasoning=True,
|
||||
min_batch_allocation=0.1,
|
||||
min_reasoning_length=50,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue