mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
big update for letter counting
This commit is contained in:
parent
0524a71c76
commit
0bb38e79ef
3 changed files with 1536 additions and 2187 deletions
File diff suppressed because it is too large
Load diff
72
environments/letter_counting_environment/config.yaml
Normal file
72
environments/letter_counting_environment/config.yaml
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
# Tinker-Atropos Configuration - Letter Counting Environment
|
||||
# This environment uses adaptive difficulty (curriculum learning) with 10 tiers
|
||||
# Evaluation uses static dataset from HuggingFace: NousResearch/Letter-Counting-Eval
|
||||
|
||||
# Environment configuration
|
||||
env:
|
||||
# Base environment config
|
||||
group_size: 8
|
||||
batch_size: 256
|
||||
max_batches_offpolicy: 3
|
||||
tokenizer_name: "Qwen/Qwen3-8B"
|
||||
use_wandb: true
|
||||
rollout_server_url: "http://localhost:8000"
|
||||
wandb_name: "letter-counting-env"
|
||||
ensure_scores_are_not_same: true
|
||||
max_token_length: 8192
|
||||
max_num_workers: 24
|
||||
worker_timeout: 3600 # 1 hour - needed for high difficulty levels
|
||||
total_steps: 5000
|
||||
steps_per_eval: 5
|
||||
inference_weight: 1.0
|
||||
data_path_to_save_groups: null
|
||||
eval_limit_ratio: 0.1
|
||||
|
||||
# Generation configuration
|
||||
generation_temperature: 1.0
|
||||
eval_temperature: 0.6
|
||||
max_generation_tokens: 15360
|
||||
|
||||
# Training filtering (CRITICAL for stable training):
|
||||
# - Groups with >80% success rate are SKIPPED (too easy, no learning signal)
|
||||
# - Groups with <20% success rate are SKIPPED (too hard, no learning signal)
|
||||
# - Groups with all identical scores are SKIPPED (no variance)
|
||||
difficulty_window_size: 150 # Number of recent groups to track (larger = more stable)
|
||||
difficulty_increase_threshold: 0.8 # Increase difficulty if success rate > this (also skip group)
|
||||
difficulty_decrease_threshold: 0.2 # Decrease difficulty if success rate < this (also skip group)
|
||||
min_difficulty_level: 1 # Minimum difficulty (1 = easiest)
|
||||
max_difficulty_level: 10 # Maximum difficulty (10 = 500 chars, 50 letters)
|
||||
starting_difficulty_level: 4 # Start at medium difficulty
|
||||
|
||||
# Logging configuration
|
||||
debug_logging: true
|
||||
suppress_base_env_logs: true
|
||||
|
||||
# Data dumping configuration (for creating offline training datasets)
|
||||
dump_rollouts: false
|
||||
dump_batch_size: 100
|
||||
|
||||
# OpenAI-compatible server configuration
|
||||
openai:
|
||||
- model_name: "Qwen/Qwen3-8B"
|
||||
base_url: "http://localhost:8001/v1"
|
||||
api_key: "x"
|
||||
weight: 1.0
|
||||
num_requests_for_eval: 256
|
||||
|
||||
# Tinker-specific example configuration
|
||||
tinker:
|
||||
lora_rank: 32
|
||||
learning_rate: 0.00004
|
||||
max_token_trainer_length: 16864
|
||||
checkpoint_dir: "./temp/"
|
||||
save_checkpoint_interval: 0
|
||||
|
||||
# Wandb configuration for trainer
|
||||
wandb_project: "tinker-letter-counting"
|
||||
wandb_group: null
|
||||
wandb_run_name: "tinker-letter-counting-run"
|
||||
|
||||
# Standard Atropos flags
|
||||
slurm: false
|
||||
testing: false
|
||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue