rework server and eval for rl rollout. add in asyncmanagedserver for

verifiers
2026-04-22 16:48:57 +00:00 · 2026-01-10 14:55:08 +05:30 · 2026-01-10 14:55:08 +05:30 · cf636595d2
commit cf636595d2
parent 3449a4c23d
4 changed files with 652 additions and 91 deletions
--- a/environments/configs/verifiers.yaml
+++ b/environments/configs/verifiers.yaml
@ -0,0 +1,31 @@
+# Verifiers environment configuration
+# Usage: python environments/verifiers_server.py serve --config environments/configs/verifiers.yaml
+#
+# For SFT data generation with external API:
+#   python environments/verifiers_server.py process \
+#       --env.vf_env_name primeintellect/gsm8k \
+#       --env.data_path_to_save_groups output.jsonl \
+#       --openai.base_url https://api.openai.com/v1 \
+#       --openai.api_key $OPENAI_API_KEY \
+#       --openai.model_name gpt-4o
+
+env:
+  vf_env_name: "primeintellect/gsm8k"  # Prime Env Hub environment
+  env_args: {}
+  group_size: 8
+  max_token_length: 2048
+  tokenizer_name: "Qwen/Qwen2.5-1.5B-Instruct"
+  rollout_server_url: "http://localhost:8000"
+  use_wandb: true
+  wandb_name: "verifiers"
+  total_steps: 1000
+  batch_size: 4
+  steps_per_eval: 100
+
+openai:
+  - model_name: "Qwen/Qwen2.5-1.5B-Instruct"
+    base_url: "http://localhost:9001/v1"
+    api_key: "x"
+
+slurm: false
+testing: false