Fix environment issues. Safely ran python3 accessibility_env.py --help

2026-04-28 17:29:30 +00:00 · 2025-05-18 13:07:08 -07:00 · 2025-05-18 13:07:08 -07:00 · 659247fc00
commit 659247fc00
parent 737139994a
1 changed files with 25 additions and 18 deletions
--- a/environments/hack0/accessibility_env/accessibility_env.py
+++ b/environments/hack0/accessibility_env/accessibility_env.py
@ -1,11 +1,17 @@
 # environments/hack0/accessibility_env/accessibility_env.py
-from typing import List, Optional, Tuple  # Common type hints
+import os  # For API keys, etc.
 from typing import List, Optional, Tuple  # Common type hints, added Dict
-from atroposlib.envs.base import APIServerConfig, BaseEnv, BaseEnvConfig
+# Corrected imports for Atropos types
-from atroposlib.type_definitions import (  # Assuming you'll need these
+from atroposlib.envs.base import (
-    Item,
+    APIServerConfig,
    BaseEnv,
    BaseEnvConfig,
    ScoredDataGroup,
 )
 from atroposlib.type_definitions import (  # GameHistory might not be needed yet, Item is common
    Item,
 )
 from atroposlib.utils.tokenize_for_trainer import tokenize_for_trainer
@ -30,23 +36,24 @@ class AccessibilityEnv(BaseEnv):
    @classmethod
    def config_init(cls) -> Tuple[AccessibilityEnvConfig, List[APIServerConfig]]:
        env_config = AccessibilityEnvConfig(
-            tokenizer_name="NousResearch/Llama-3-8B-Instruct- যেভাবে-তুমি-বাংলা-বলো",  # Placeholder, change later
+            tokenizer_name="NousResearch/Llama-3-8B-Instruct- যেভাবে-তুমি-বাংলা-বলো",  # Placeholder
-            group_size=4,  # Example, adjust as needed
+            group_size=2,  # Smaller for faster testing initially
-            use_wandb=True,  # Recommended for hackathon
+            use_wandb=True,
-            rollout_server_url="http://localhost:8000",  # Standard Atropos default
+            rollout_server_url="http://localhost:8000",
-            total_steps=100,  # For process mode, this is more like num_items_to_process
+            total_steps=10,  # For process mode, number of items to generate
-            batch_size=8,  # Example
+            batch_size=4,  # Max items in a single call to score (related to group_size)
-            steps_per_eval=20,  # Less relevant for process-only
+            steps_per_eval=5,
-            max_token_length=2048,  # LLM context window
+            max_token_length=2048,
-            wandb_name="accessibility_env_hackathon",  # Your Wandb run name
+            wandb_name="accessibility_env_hackathon_dev",  # Dev run name
            # data_path_to_save_groups="accessibility_rollouts.jsonl" # Often set via CLI for process
        )
        server_configs = [
            APIServerConfig(
-                model_name="gpt-3.5-turbo",  # Placeholder, use your desired model
+                model_name="gpt-3.5-turbo",  # Or your preferred model
-                # base_url="YOUR_LLM_PROVIDER_BASE_URL_IF_NOT_OPENAI_DEFAULT", # e.g., for vLLM
+                # base_url=None, # Defaults to OpenAI if None
-                # api_key="YOUR_API_KEY_HERE_OR_USE_ENV_VAR", # Best to use os.environ.get("OPENAI_API_KEY")
+                api_key=os.environ.get(
-                num_requests_for_eval=32,  # Example
+                    "OPENAI_API_KEY", "YOUR_API_KEY_PLACEHOLDER_IF_NOT_SET"
                ),  # Important!
                num_requests_for_eval=16,
            ),
        ]
        return env_config, server_configs