# Examples of api config below # Proprietary models examples gpt-4-1106-preview: model: gpt-4-1106-preview endpoints: null api_type: openai parallel: 32 max_tokens: 4096 temperature: 0.0 gpt-4-turbo-2024-04-09: model: gpt-4-turbo-2024-04-09 endpoints: null api_type: openai parallel: 32 max_tokens: 4096 temperature: 0.0 gpt-4.1: model: gpt-4.1 endpoints: null api_type: openai parallel: 64 max_tokens: 32000 temperature: 0.0 gpt-4.1-mini: model: gpt-4.1-mini endpoints: null api_type: openai parallel: 64 max_tokens: 32000 # gemini-2.5: # model: gemini-2.5-pro-preview-03-25 # endpoints: null # api_type: gemini # parallel: 50 gemini-2.5: model: gemini-2.5-pro-exp-03-25 endpoints: null api_type: vertex parallel: 16 project_id: gen-lang-client-0966014990 regions: us-central1 claude-3-7-sonnet-20250219-thinking-16k: model: claude-3-7-sonnet-20250219 endpoints: null max_tokens: 20000 budget_tokens: 16000 api_type: anthropic_thinking parallel: 32 deepseek-r1: endpoints: - api_key: api_type: deepseek_reasoner parallel: 32 claude-3-5-sonnet-20241022: model: claude-3-5-sonnet-20241022 endpoints: null max_tokens: 20000 api_type: anthropic parallel: 32 o3-mini-2025-01-31-high: model: o3-mini-2025-01-31 endpoints: null reasoning_effort: high api_type: openai_thinking parallel: 32 gpt-4o-mini-2024-07-18: model: gpt-4o-mini endpoints: null api_type: openai parallel: 128 max_tokens: 8196 temperature: 0.0 # Local inference examples qwq-32b: model: Qwen/QwQ-32B endpoints: null api_type: sglang local_engine: True temperature: 0.6 end_think_token: "" max_tokens: 32000 gemma-3-27b-it: model: google/gemma-3-27b-it endpoints: - api_base: http://0.0.0.0:/v1 api_key: '-' api_type: openai parallel: 128 max_tokens: 8196 temperature: 0.0