big update for letter counting

2026-04-19 12:57:58 +00:00 · 2026-01-02 14:10:02 +00:00 · 2026-01-02 14:10:02 +00:00 · 0bb38e79ef
commit 0bb38e79ef
parent 0524a71c76
3 changed files with 1536 additions and 2187 deletions
--- a/environments/letter_counting_environment.py
+++ b/environments/letter_counting_environment.py
--- a/environments/letter_counting_environment/config.yaml
+++ b/environments/letter_counting_environment/config.yaml
@ -0,0 +1,72 @@
+# Tinker-Atropos Configuration - Letter Counting Environment
+# This environment uses adaptive difficulty (curriculum learning) with 10 tiers
+# Evaluation uses static dataset from HuggingFace: NousResearch/Letter-Counting-Eval
+
+# Environment configuration
+env:
+  # Base environment config
+  group_size: 8
+  batch_size: 256
+  max_batches_offpolicy: 3
+  tokenizer_name: "Qwen/Qwen3-8B"
+  use_wandb: true
+  rollout_server_url: "http://localhost:8000"
+  wandb_name: "letter-counting-env"
+  ensure_scores_are_not_same: true
+  max_token_length: 8192
+  max_num_workers: 24
+  worker_timeout: 3600  # 1 hour - needed for high difficulty levels
+  total_steps: 5000
+  steps_per_eval: 5
+  inference_weight: 1.0
+  data_path_to_save_groups: null
+  eval_limit_ratio: 0.1
+
+  # Generation configuration
+  generation_temperature: 1.0
+  eval_temperature: 0.6
+  max_generation_tokens: 15360
+
+  # Training filtering (CRITICAL for stable training):
+  #   - Groups with >80% success rate are SKIPPED (too easy, no learning signal)
+  #   - Groups with <20% success rate are SKIPPED (too hard, no learning signal)
+  #   - Groups with all identical scores are SKIPPED (no variance)
+  difficulty_window_size: 150         # Number of recent groups to track (larger = more stable)
+  difficulty_increase_threshold: 0.8  # Increase difficulty if success rate > this (also skip group)
+  difficulty_decrease_threshold: 0.2  # Decrease difficulty if success rate < this (also skip group)
+  min_difficulty_level: 1             # Minimum difficulty (1 = easiest)
+  max_difficulty_level: 10            # Maximum difficulty (10 = 500 chars, 50 letters)
+  starting_difficulty_level: 4        # Start at medium difficulty
+
+  # Logging configuration
+  debug_logging: true
+  suppress_base_env_logs: true
+
+  # Data dumping configuration (for creating offline training datasets)
+  dump_rollouts: false
+  dump_batch_size: 100
+
+# OpenAI-compatible server configuration
+openai:
+  - model_name: "Qwen/Qwen3-8B"
+    base_url: "http://localhost:8001/v1"
+    api_key: "x"
+    weight: 1.0
+    num_requests_for_eval: 256
+
+# Tinker-specific example configuration
+tinker:
+  lora_rank: 32
+  learning_rate: 0.00004
+  max_token_trainer_length: 16864
+  checkpoint_dir: "./temp/"
+  save_checkpoint_interval: 0
+
+  # Wandb configuration for trainer
+  wandb_project: "tinker-letter-counting"
+  wandb_group: null
+  wandb_run_name: "tinker-letter-counting-run"
+
+# Standard Atropos flags
+slurm: false
+testing: false
--- a/environments/letter_counting_environment/letter_counting_environment.py
+++ b/environments/letter_counting_environment/letter_counting_environment.py