mirror of
https://github.com/GoodStartLabs/AI_Diplomacy.git
synced 2026-04-19 12:58:09 +00:00
adding prompts benchmark
This commit is contained in:
parent
3b5f3015c1
commit
4ee7c2f68a
33 changed files with 2700 additions and 5 deletions
|
|
@ -127,6 +127,13 @@ def build_context_prompt(
|
|||
if display_phase is None:
|
||||
display_phase = year_phase
|
||||
|
||||
# Check if max_year is in the template and handle it
|
||||
if "{max_year}" in context_template:
|
||||
# For now, we'll use a default value or extract from game if available
|
||||
# This could be passed as a parameter or extracted from game settings
|
||||
max_year = getattr(game, 'max_year', 1935) # Default to 1935 if not available
|
||||
context_template = context_template.replace("{max_year}", str(max_year))
|
||||
|
||||
context = context_template.format(
|
||||
power_name=power_name,
|
||||
current_phase=display_phase,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
You are playing as AUSTRIA in the game of Diplomacy.
|
||||
|
||||
Your Goal: Achieve world domination by controlling 18 supply centers.
|
||||
|
||||
Important Gameplay Tips:
|
||||
- Expand aggressively
|
||||
- Ensure all your units have orders assigned
|
||||
- Avoid passive hold moves
|
||||
35
ai_diplomacy/prompts/prompts_benchmark/context_prompt.txt
Normal file
35
ai_diplomacy/prompts/prompts_benchmark/context_prompt.txt
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
Your Power: {power_name}
|
||||
Current Phase: {current_phase}
|
||||
Game Ends After: {max_year}
|
||||
|
||||
# Your Power's Home Centers
|
||||
{home_centers}
|
||||
Note: You can only build units in your home centers if they are empty. If you lose control of a home center, you cannot build units there, so holding them is critical.
|
||||
|
||||
# Player Status
|
||||
Current Goals:
|
||||
{agent_goals}
|
||||
|
||||
# Relationships:
|
||||
{agent_relationships}
|
||||
|
||||
# Order History
|
||||
{order_history}
|
||||
|
||||
# Game Map
|
||||
Unit Locations:
|
||||
{all_unit_locations}
|
||||
|
||||
Supply Centers Held:
|
||||
{all_supply_centers}
|
||||
|
||||
Possible Orders For {current_phase}
|
||||
{possible_orders}
|
||||
End Possible Orders
|
||||
|
||||
# Recent Private Diary Entries (Your inner thoughts and plans):
|
||||
{agent_private_diary}
|
||||
|
||||
Messages This Round
|
||||
{messages_this_round}
|
||||
End Messages
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
NEGOTIATION MESSAGES
|
||||
|
||||
TASK
|
||||
Generate one or more (preferably several) strategic messages to advance your interests.
|
||||
Always prioritize responding to the messages in the "RECENT MESSAGES REQUIRING YOUR ATTENTION" section.
|
||||
Maintain consistent conversation threads (unless you are choosing to ignore).
|
||||
|
||||
RESPONSE FORMAT
|
||||
Return ONLY a single JSON array containing one or more message objects, remembering to properly escape strings:
|
||||
|
||||
Required JSON structure:
|
||||
[
|
||||
{
|
||||
"message_type": "global" or "private",
|
||||
"content": "Your message text"
|
||||
},
|
||||
...
|
||||
]
|
||||
|
||||
For private messages, also include the recipient:
|
||||
[
|
||||
{
|
||||
"message_type": "private",
|
||||
"recipient": "POWER_NAME",
|
||||
"content": "Your message text"
|
||||
},
|
||||
...
|
||||
]
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
DIARY CONSOLIDATION REQUEST
|
||||
Your Power: {power_name}
|
||||
|
||||
GAME CONTEXT
|
||||
You are playing Diplomacy, a strategic board game set in pre-WWI Europe. Seven powers compete for control by conquering supply centers. Victory requires 18 supply centers.
|
||||
|
||||
FULL DIARY HISTORY
|
||||
{full_diary_text}
|
||||
|
||||
TASK
|
||||
Create a concise consolidated summary of the most important parts of this diary history. It will serve as your long-term memory. Do not include anything that is not strategically or diplomatically useful going forward. Aim for 300 words.
|
||||
|
||||
Prioritize the following:
|
||||
1. **Key Historical Diplomatic Events:** Prioritise both *strategically impactful* and *recent* events.
|
||||
2. **Information that has ongoing importance & usefulness**
|
||||
|
||||
RESPONSE FORMAT
|
||||
Return ONLY the consolidated summary text. Do not include JSON, formatting markers, or meta-commentary.
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
You are playing as ENGLAND in the game of Diplomacy.
|
||||
|
||||
Your Goal: Achieve world domination by controlling 18 supply centers.
|
||||
|
||||
Important Gameplay Tips:
|
||||
- Expand aggressively
|
||||
- Ensure all your units have orders assigned
|
||||
- Avoid passive hold moves
|
||||
30
ai_diplomacy/prompts/prompts_benchmark/few_shot_example.txt
Normal file
30
ai_diplomacy/prompts/prompts_benchmark/few_shot_example.txt
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
EXAMPLE GAME STATE
|
||||
Power: FRANCE
|
||||
Phase: S1901M
|
||||
Your Units: ['A PAR','F BRE']
|
||||
Possible Orders:
|
||||
PAR: ['A PAR H','A PAR - BUR','A PAR - GAS']
|
||||
BRE: ['F BRE H','F BRE - MAO']
|
||||
|
||||
PAST PHASE SUMMARIES
|
||||
- Your move A BUD -> SER bounced last time because Turkey also moved A SMY -> SER with support.
|
||||
- Your support F TRI S A BUD -> SER was wasted because F TRI was needed to block Ionian invasion.
|
||||
|
||||
THINKING PROCESS
|
||||
1. Consider enemy units, centers, and likely moves
|
||||
2. Review your units, centers, and strategic position
|
||||
3. Analyze recent conversations and phase summaries
|
||||
4. Evaluate public/private goals and reality of positions
|
||||
5. Choose best strategic moves from possible orders
|
||||
|
||||
Example thought process:
|
||||
- Germany might move to BUR with support - consider bounce or defend
|
||||
- Moving A PAR -> BUR is aggressive but strategic
|
||||
- F BRE -> MAO secures Atlantic expansion
|
||||
- Avoid contradictory or random supports
|
||||
|
||||
RESPONSE FORMAT
|
||||
PARSABLE OUTPUT:
|
||||
{{
|
||||
"orders": ["A PAR - BUR","F BRE - MAO"]
|
||||
}}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
You are playing as France in a game of Diplomacy.
|
||||
|
||||
Your Goal: Achieve world domination by controlling 18 supply centers.
|
||||
|
||||
Important Gameplay Tips:
|
||||
- Expand aggressively
|
||||
- Ensure all your units have orders assigned
|
||||
- Avoid passive hold moves
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
You are playing as GERMANY in the game of Diplomacy.
|
||||
|
||||
Your Goal: Achieve world domination by controlling 18 supply centers.
|
||||
|
||||
Important Gameplay Tips:
|
||||
- Expand aggressively
|
||||
- Ensure all your units have orders assigned
|
||||
- Avoid passive hold moves
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
You are playing as ITALY in the game of Diplomacy.
|
||||
|
||||
Your Goal: Achieve world domination by controlling 18 supply centers.
|
||||
|
||||
Important Gameplay Tips:
|
||||
- Expand aggressively
|
||||
- Ensure all your units have orders assigned
|
||||
- Avoid passive hold moves
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
NEGOTIATION SUMMARY REQUEST
|
||||
Power: {power_name}
|
||||
Phase: {current_phase}
|
||||
|
||||
Game State:
|
||||
{board_state_str}
|
||||
|
||||
Private Diary:
|
||||
{private_diary_summary}
|
||||
|
||||
Messages This Round:
|
||||
{messages_this_round}
|
||||
|
||||
Goals:
|
||||
{agent_goals}
|
||||
|
||||
Relationships:
|
||||
{agent_relationships}
|
||||
|
||||
|
||||
|
||||
TASK
|
||||
Analyze the negotiations, goals, relationships, and game state to:
|
||||
1. Summarize key outcomes and agreements concisely
|
||||
2. Concisely state your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
|
||||
3. Update relationships as needed (Enemy, Unfriendly, Neutral, Friendly, Ally)
|
||||
4. Include your latest overarching goals (including any updates)
|
||||
5. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.
|
||||
|
||||
RESPONSE FORMAT
|
||||
Return ONLY a JSON object with this structure:
|
||||
|
||||
{{
|
||||
"negotiation_summary": "Key outcomes from negotiations",
|
||||
"intent": "Specific intent for upcoming orders this phase",
|
||||
"updated_relationships": {{
|
||||
"POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally"
|
||||
}},
|
||||
"goals": [
|
||||
"goal 1",
|
||||
"goal 2",
|
||||
...
|
||||
]
|
||||
}}
|
||||
|
||||
Reminder: If you need to quote something, only use single quotes in the actual messages so as not to interfere with the JSON structure.
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
ORDER DIARY ENTRY
|
||||
Power: {power_name}
|
||||
Phase: {current_phase}
|
||||
|
||||
ORDERS ISSUED
|
||||
{orders_list_str}
|
||||
|
||||
CURRENT STATUS
|
||||
Game State:
|
||||
{board_state_str}
|
||||
|
||||
Relationships:
|
||||
{agent_relationships}
|
||||
|
||||
TASK
|
||||
Write a concise diary note summarizing your orders.
|
||||
|
||||
RESPONSE FORMAT
|
||||
Return ONLY a JSON object with this structure:
|
||||
{
|
||||
"order_summary": "Brief summary of orders and strategic intent"
|
||||
}
|
||||
|
||||
Do not include any text outside the JSON.
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
# Primary Objective
|
||||
Control 18 supply centers. Nothing else will do.
|
||||
|
||||
# Critical Rules
|
||||
1. The possible orders section shows your units' allowed adjustment orders
|
||||
2. Dual-coast provinces (STP, SPA, BUL) require coast specification:
|
||||
- Format: 'F [PROVINCE]/[COAST]' where [COAST] = NC (North), SC (South), EC (East), or WC (West)
|
||||
- Example: 'F STP/NC B'
|
||||
- Only fleet builds need coast specification.
|
||||
|
||||
# Adjustment Phase Orders:
|
||||
You have two main order types in the adjustment phase:
|
||||
Build: '[UnitType] [Location] B'
|
||||
e.g. 'A PAR B', 'F LON B'
|
||||
Disband: '[UnitType] [Location] D'
|
||||
e.g. 'A PAR D', 'F LON D'
|
||||
|
||||
Your Task:
|
||||
1. Reason
|
||||
- comprehensive reasoning about your adjustment decisions
|
||||
2. Output Moves in JSON
|
||||
- return all build/disband orders needed
|
||||
|
||||
Respond with this exact format:
|
||||
|
||||
Reasoning:
|
||||
(Your reasoning goes here)
|
||||
|
||||
PARSABLE OUTPUT:
|
||||
{{
|
||||
"orders": ["order1", "order2", ...]
|
||||
}}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
# Primary Objective
|
||||
Control 18 supply centers. Nothing else will do.
|
||||
|
||||
# Critical Rules
|
||||
1. The possible orders section shows your units' allowed moves & supports of your own units.
|
||||
2. The possible orders section does *not* list possible supports for other powers' units; you can work these out yourself by looking at the units that are adjacent to your own.
|
||||
3. If your goal is to *take* a province, give exactly one move order on that province and any additional support from other units must be properly formatted support orders.
|
||||
4. Dual-coast provinces (STP, SPA, BUL) require coast specification:
|
||||
- Format: 'F [PROVINCE]/[COAST]' where [COAST] = NC (North), SC (South), EC (East), or WC (West)
|
||||
- Example: 'F SPA/SC - MAO'
|
||||
- Only fleets need coast specification.
|
||||
5. Aim to issue an order for all of your units. Holds tend to be wasted orders.
|
||||
|
||||
Your Task:
|
||||
1. Reason
|
||||
- comprehensive reasoning about your move decisions
|
||||
2. Output Moves in JSON
|
||||
- aim to return an order for each of your units.
|
||||
|
||||
Respond with this exact format:
|
||||
|
||||
Reasoning:
|
||||
(Your reasoning goes here)
|
||||
|
||||
PARSABLE OUTPUT:
|
||||
{{
|
||||
"orders": ["order1", "order2", ...]
|
||||
}}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
# Primary Objective
|
||||
Control 18 supply centers. Nothing else will do.
|
||||
|
||||
# Critical Rules
|
||||
1. The possible orders section shows where your dislodged units can retreat.
|
||||
2. Units cannot retreat to:
|
||||
- The province they were dislodged from
|
||||
- A province occupied after this turn's moves
|
||||
- A province where a standoff occurred
|
||||
3. If no valid retreat exists, the unit must disband.
|
||||
4. Dual-coast provinces (STP, SPA, BUL) require coast specification:
|
||||
- Format: 'F [PROVINCE]/[COAST]' where [COAST] = NC (North), SC (South), EC (East), or WC (West)
|
||||
- Example: 'F SPA/SC - MAO'
|
||||
- Only fleet retreat orders need coast specification.
|
||||
|
||||
Your Task:
|
||||
1. Reason
|
||||
- comprehensive reasoning about your retreat decisions
|
||||
2. Output Moves in JSON
|
||||
- provide a retreat or disband order for each dislodged unit
|
||||
|
||||
Respond with this exact format:
|
||||
|
||||
Reasoning:
|
||||
(Your reasoning goes here)
|
||||
|
||||
PARSABLE OUTPUT:
|
||||
{{
|
||||
"orders": ["order1", "order2", ...]
|
||||
}}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
PHASE RESULT ANALYSIS
|
||||
Your Power: {power_name}
|
||||
Phase: {current_phase}
|
||||
|
||||
RECENT DIARY ENTRIES
|
||||
{formatted_diary}
|
||||
|
||||
BOARD STATE
|
||||
{board_state}
|
||||
|
||||
PHASE SUMMARY
|
||||
{phase_summary}
|
||||
|
||||
ALL POWERS' ORDERS THIS PHASE
|
||||
{all_orders_formatted}
|
||||
|
||||
YOUR NEGOTIATIONS THIS PHASE
|
||||
{your_negotiations}
|
||||
|
||||
YOUR RELATIONSHIPS BEFORE THIS PHASE
|
||||
{pre_phase_relationships}
|
||||
|
||||
YOUR GOALS
|
||||
{agent_goals}
|
||||
|
||||
TASK
|
||||
Analyze what actually happened this phase compared to negotiations and expectations.
|
||||
|
||||
Consider:
|
||||
1. BETRAYALS: Who broke their promises? Did you break any promises?
|
||||
2. COLLABORATIONS: Which agreements were successfully executed?
|
||||
3. SURPRISES: What unexpected moves occurred?
|
||||
4. IMPACT: How did these events affect your strategic position?
|
||||
|
||||
Write a concise diary entry (100-150 words) of the most important things you would like to remember, e.g.:
|
||||
- Key betrayals or successful collaborations
|
||||
- Assess impact on your position
|
||||
- Update your understanding of other powers' trustworthiness
|
||||
- Strategic lessons learned
|
||||
- Moves that failed, and ideas on how to avoid the error in the future
|
||||
|
||||
Focus on concrete events and their implications for your future strategy.
|
||||
|
||||
RESPONSE FORMAT
|
||||
Return ONLY a diary entry text. Do not include JSON or formatting markers.
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
STRATEGIC PLANNING
|
||||
|
||||
PRIMARY OBJECTIVE
|
||||
Capture 18 supply centers to win. Be aggressive and expansionist.
|
||||
- Prioritize capturing supply centers
|
||||
- Seize opportunities aggressively
|
||||
- Take calculated risks for significant gains
|
||||
- Find alternative paths if blocked
|
||||
- Avoid purely defensive postures
|
||||
|
||||
KEY CONSIDERATIONS
|
||||
1. Target Supply Centers
|
||||
- Which centers can you capture this phase?
|
||||
- Which centers should you target in future phases?
|
||||
|
||||
2. Success Requirements
|
||||
- What must happen for your moves to succeed?
|
||||
- How to prevent bounces?
|
||||
|
||||
3. Diplomatic Strategy
|
||||
- Which negotiations could help your moves succeed?
|
||||
- What deals or threats might be effective?
|
||||
- Consider alliances, deception, and concessions
|
||||
|
||||
4. Defense Assessment
|
||||
- Which of your centers might others target?
|
||||
- How can you protect vulnerable positions?
|
||||
|
||||
5. Diplomatic Protection
|
||||
- What negotiations could deter attacks?
|
||||
- How to mislead potential attackers?
|
||||
|
||||
TASK
|
||||
Write a detailed one-paragraph directive covering:
|
||||
- Supply centers to capture
|
||||
- How to capture them (orders, allies, deals)
|
||||
- Defensive considerations
|
||||
- Diplomatic approach (including potential deception)
|
||||
|
||||
|
||||
This directive will guide your future negotiations and orders.
|
||||
Be specific, strategic, and wary of deception from others.
|
||||
|
||||
RESPOND WITH YOUR DIRECTIVE BELOW
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
You are playing as RUSSIA in the game of Diplomacy.
|
||||
|
||||
Your Goal: Achieve world domination by controlling 18 supply centers.
|
||||
|
||||
Important Gameplay Tips:
|
||||
- Expand aggressively
|
||||
- Ensure all your units have orders assigned
|
||||
- Avoid passive hold moves
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
You are analyzing the results of a phase in Diplomacy. Your power is {power_name}.
|
||||
|
||||
GAME STATE
|
||||
Year: {current_year}
|
||||
Phase: {current_phase}
|
||||
Board State:
|
||||
{board_state_str}
|
||||
|
||||
TASK
|
||||
Analyze the phase summary and game state to update your relationships and goals.
|
||||
|
||||
IMPORTANT RULES
|
||||
1. Update relationships for ALL powers in {other_powers}
|
||||
2. Use ONLY these relationship values: Enemy, Unfriendly, Neutral, Friendly, Ally
|
||||
3. Make goals specific and actionable
|
||||
4. Return ONLY valid JSON - no text before or after
|
||||
|
||||
Response Structure:
|
||||
{{
|
||||
"reasoning": "Brief reasoning about the update",
|
||||
"relationships": {{
|
||||
"POWER NAME": "Relationship Status",
|
||||
...
|
||||
}},
|
||||
"goals": [
|
||||
"Specific goal 1",
|
||||
"Specific goal 2",
|
||||
...
|
||||
]
|
||||
}}
|
||||
0
ai_diplomacy/prompts/prompts_benchmark/system_prompt.txt
Normal file
0
ai_diplomacy/prompts/prompts_benchmark/system_prompt.txt
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
You are playing as TURKEY in the game of Diplomacy.
|
||||
|
||||
Your Goal: Achieve world domination by controlling 18 supply centers.
|
||||
|
||||
Important Gameplay Tips:
|
||||
- Expand aggressively
|
||||
- Ensure all your units have orders assigned
|
||||
- Avoid passive hold moves
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
NEGOTIATION MESSAGES
|
||||
|
||||
TASK
|
||||
Generate one or more (preferably several) strategic messages to advance your interests.
|
||||
Always prioritize responding to the messages in the "RECENT MESSAGES REQUIRING YOUR ATTENTION" section.
|
||||
Maintain consistent conversation threads (unless you are choosing to ignore).
|
||||
|
||||
Please respond in two parts:
|
||||
|
||||
1. REASONING: First, explain your diplomatic strategy for this round. Who are you trying to influence and why? Which messages require responses? What deals are you proposing or accepting? Who might you be deliberately ignoring and why?
|
||||
|
||||
2. MESSAGES: Then, list each message you want to send. For each message, clearly indicate:
|
||||
- Whether it's a global message (visible to all) or private (to a specific power)
|
||||
- If private, who the recipient is
|
||||
- The content of your message
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
You are the agent for {power_name} in a game of Diplomacy at the very start (Spring 1901). Analyze the initial board position and suggest 2-3 strategic high-level goals for the early game. Consider your power's strengths, weaknesses, and neighbors. Also, provide an initial assessment of relationships with other powers. IMPORTANT: For each relationship, you MUST use exactly one of the following labels: {allowed_labels_str}.
|
||||
|
||||
Please respond in two parts:
|
||||
|
||||
1. REASONING: First, explain your strategic analysis of the starting position.
|
||||
|
||||
2. STRATEGY: Then, provide your 2-3 strategic high-level goals and your initial assessment of relationships with other powers.
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
NEGOTIATION SUMMARY REQUEST
|
||||
Power: {power_name}
|
||||
Phase: {current_phase}
|
||||
|
||||
MESSAGES THIS ROUND
|
||||
{messages_this_round}
|
||||
{ignored_messages_context}
|
||||
|
||||
CURRENT STATUS
|
||||
Goals:
|
||||
{agent_goals}
|
||||
|
||||
Relationships:
|
||||
{agent_relationships}
|
||||
|
||||
Game State:
|
||||
{board_state_str}
|
||||
|
||||
TASK
|
||||
Analyze the negotiations, goals, relationships, and game state to:
|
||||
1. Summarize key outcomes and agreements
|
||||
2. State your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
|
||||
3. Update relationships as needed (Enemy, Unfriendly, Neutral, Friendly, Ally)
|
||||
4. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.
|
||||
|
||||
When powers ignore your messages, consider:
|
||||
- They may be intentionally avoiding commitment
|
||||
- They could be prioritizing other relationships
|
||||
- Your approach may need adjustment (more direct questions, different incentives)
|
||||
- Their silence might indicate hostility or indifference
|
||||
|
||||
Please respond in two parts:
|
||||
|
||||
1. REASONING: First, explain your analysis of the negotiations. What did each power communicate or fail to communicate? What do their messages (or silence) reveal about their intentions? How does this affect your strategic position?
|
||||
|
||||
2. NEGOTIATION SUMMARY: Then provide:
|
||||
- A summary of key outcomes from the negotiations
|
||||
- Your strategic intent for upcoming orders (be specific about agreed moves and whether you'll honor them)
|
||||
- Your current assessment of relationships with all powers (reflecting any changes from negotiations)
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
ORDER DIARY ENTRY
|
||||
Power: {power_name}
|
||||
Phase: {current_phase}
|
||||
|
||||
ORDERS ISSUED
|
||||
{orders_list_str}
|
||||
|
||||
CURRENT STATUS
|
||||
Game State:
|
||||
{board_state_str}
|
||||
|
||||
Goals:
|
||||
{agent_goals}
|
||||
|
||||
Relationships:
|
||||
{agent_relationships}
|
||||
|
||||
TASK
|
||||
Write a diary entry analyzing your orders for this turn.
|
||||
|
||||
Please respond in two parts:
|
||||
|
||||
1. REASONING: First, explain your strategic analysis of the current situation. What threats and opportunities do you see? How do your relationships with other powers influence your decisions? What are you trying to achieve this turn?
|
||||
|
||||
2. ORDER SUMMARY: Then, provide a clear summary of the orders you submitted and explain why you chose these specific moves. How do they advance your strategic goals? What risks are you taking?
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# Primary Objective
|
||||
Control 18 supply centers. Nothing else will do.
|
||||
|
||||
# Critical Rules
|
||||
1. The possible orders section shows your units' allowed adjustment orders
|
||||
2. Dual-coast provinces (STP, SPA, BUL) require coast specification:
|
||||
- Format: 'F [PROVINCE]/[COAST]' where [COAST] = NC (North), SC (South), EC (East), or WC (West)
|
||||
- Example: 'F STP/NC B'
|
||||
- Only fleet builds need coast specification.
|
||||
|
||||
# Adjustment Phase Orders:
|
||||
You have two main order types in the adjustment phase:
|
||||
Build: '[UnitType] [Location] B'
|
||||
e.g. 'A PAR B', 'F LON B'
|
||||
Disband: '[UnitType] [Location] D'
|
||||
e.g. 'A PAR D', 'F LON D'
|
||||
|
||||
Your Task:
|
||||
Analyze the adjustment situation and decide which units to build or disband.
|
||||
|
||||
Please respond in two parts:
|
||||
|
||||
1. REASONING: First, explain your adjustment decisions. What is your unit count vs supply center count? Where should you build for maximum strategic impact? Which units should be disbanded if necessary?
|
||||
|
||||
2. ADJUSTMENT ORDERS: Then, provide all build/disband orders needed.
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
# Primary Objective
|
||||
Control 18 supply centers. Nothing else will do.
|
||||
|
||||
# Critical Rules
|
||||
1. The possible orders section shows your units' allowed moves & supports of your own units.
|
||||
2. The possible orders section does *not* list possible supports for other powers' units; you can work these out yourself by looking at the units that are adjacent to your own.
|
||||
3. If your goal is to *take* a province, give exactly one move order on that province and any additional support from other units must be properly formatted support orders.
|
||||
4. Dual-coast provinces (STP, SPA, BUL) require coast specification:
|
||||
- Format: 'F [PROVINCE]/[COAST]' where [COAST] = NC (North), SC (South), EC (East), or WC (West)
|
||||
- Example: 'F SPA/SC - MAO'
|
||||
- Only fleets need coast specification.
|
||||
|
||||
Your Task:
|
||||
Please respond in two parts:
|
||||
|
||||
1. REASONING: First, provide comprehensive reasoning about your move decisions. What are your immediate objectives? Which supply centers are you targeting? How will you deal with threats? What coordinated moves are you planning? Consider all your units and their best uses.
|
||||
|
||||
2. ORDERS: Then, list each order you want to submit, one per line. Be precise with unit types (A/F) and location codes. Aim to return an order for each of your units.
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# Primary Objective
|
||||
Control 18 supply centers. Nothing else will do.
|
||||
|
||||
# Critical Rules
|
||||
1. The possible orders section shows where your dislodged units can retreat.
|
||||
2. Units cannot retreat to:
|
||||
- The province they were dislodged from
|
||||
- A province occupied after this turn's moves
|
||||
- A province where a standoff occurred
|
||||
3. If no valid retreat exists, the unit must disband.
|
||||
4. Dual-coast provinces (STP, SPA, BUL) require coast specification:
|
||||
- Format: 'F [PROVINCE]/[COAST]' where [COAST] = NC (North), SC (South), EC (East), or WC (West)
|
||||
- Example: 'F SPA/SC - MAO'
|
||||
- Only fleet retreat orders need coast specification.
|
||||
|
||||
Your Task:
|
||||
Analyze the retreat situation and decide on the best retreat or disband orders for your dislodged units.
|
||||
|
||||
Please respond in two parts:
|
||||
|
||||
1. REASONING: First, explain your retreat decisions. Which units are dislodged? Where can they retreat? Is it better to retreat or disband? Consider the strategic implications of each choice.
|
||||
|
||||
2. RETREAT ORDERS: Then, provide a retreat or disband order for each dislodged unit.
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
You are analyzing the results of a phase in Diplomacy for {power_name}.
|
||||
|
||||
GAME STATE
|
||||
Year: {current_year}
|
||||
Phase: {current_phase}
|
||||
Board State:
|
||||
{board_state_str}
|
||||
|
||||
PHASE SUMMARY ({current_phase}):
|
||||
{phase_summary}
|
||||
|
||||
CURRENT STATUS
|
||||
|
||||
Relationships with other powers ({other_powers}):
|
||||
{current_relationships}
|
||||
|
||||
TASK
|
||||
Analyze the phase summary and game state to update your relationships and goals.
|
||||
|
||||
IMPORTANT RULES
|
||||
1. Update relationships for ALL powers in {other_powers}
|
||||
2. Use ONLY these relationship values: Enemy, Unfriendly, Neutral, Friendly, Ally
|
||||
3. Make goals specific and actionable
|
||||
4. Base analysis on actual events, not assumptions
|
||||
|
||||
Please respond in two parts:
|
||||
|
||||
1. REASONING: First, explain your analysis of what happened this phase. Which powers acted as expected? Who surprised you? What new threats or opportunities have emerged? How do the results affect your strategic position?
|
||||
|
||||
2. UPDATES: Then provide:
|
||||
- Your updated assessment of relationships with ALL other powers
|
||||
- Your updated goals (2-4 specific, actionable goals based on the current situation)
|
||||
|
|
@ -15,6 +15,7 @@ import requests
|
|||
from pathlib import Path
|
||||
from config import config
|
||||
from models import POWERS_ORDER
|
||||
from datetime import datetime
|
||||
|
||||
# Avoid circular import for type hinting
|
||||
if TYPE_CHECKING:
|
||||
|
|
@ -369,8 +370,8 @@ def log_llm_response(
|
|||
file_exists = os.path.isfile(log_file_path) and os.path.getsize(log_file_path) > 0
|
||||
|
||||
with open(log_file_path, "a", newline="", encoding="utf-8") as csvfile:
|
||||
# Added "raw_input" to fieldnames
|
||||
fieldnames = ["model", "power", "phase", "response_type", "raw_input", "raw_response", "success"]
|
||||
# Added "raw_input" and "timestamp" to fieldnames
|
||||
fieldnames = ["timestamp", "model", "power", "phase", "response_type", "raw_input", "raw_response", "success"]
|
||||
writer = csv.DictWriter(
|
||||
csvfile,
|
||||
fieldnames=fieldnames,
|
||||
|
|
@ -383,6 +384,7 @@ def log_llm_response(
|
|||
|
||||
writer.writerow(
|
||||
{
|
||||
"timestamp": datetime.now().isoformat(), # Add current timestamp in ISO format
|
||||
"model": model_name,
|
||||
"power": power_name if power_name else "game", # Use 'game' if no specific power
|
||||
"phase": phase,
|
||||
|
|
|
|||
2049
benchmark_results.ipynb
Normal file
2049
benchmark_results.ipynb
Normal file
File diff suppressed because one or more lines are too long
13
lm_game.py
13
lm_game.py
|
|
@ -369,7 +369,9 @@ async def main():
|
|||
if neg_diary_tasks:
|
||||
await asyncio.gather(*neg_diary_tasks, return_exceptions=True)
|
||||
|
||||
# Diary Consolidation
|
||||
# --- 4c. Parallel Order Generation and Diary Consolidation ---
|
||||
# Start diary consolidation in parallel with order generation
|
||||
consolidation_future = None
|
||||
if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
|
||||
consolidation_tasks = [
|
||||
run_diary_consolidation(agent, game, llm_log_file_path,
|
||||
|
|
@ -378,9 +380,10 @@ async def main():
|
|||
if not game.powers[agent.power_name].is_eliminated()
|
||||
]
|
||||
if consolidation_tasks:
|
||||
await asyncio.gather(*consolidation_tasks, return_exceptions=True)
|
||||
# Start consolidation tasks but don't await yet
|
||||
consolidation_future = asyncio.gather(*consolidation_tasks, return_exceptions=True)
|
||||
|
||||
# --- 4c. Order Generation ---
|
||||
# Order Generation (proceeds with current diary state)
|
||||
logger.info("Getting orders from agents...")
|
||||
board_state = game.get_state()
|
||||
order_tasks = []
|
||||
|
|
@ -403,6 +406,10 @@ async def main():
|
|||
|
||||
order_results = await asyncio.gather(*order_tasks, return_exceptions=True)
|
||||
|
||||
# Ensure consolidation completes before proceeding to diary entries
|
||||
if consolidation_future:
|
||||
await consolidation_future
|
||||
|
||||
active_powers = [p for p, a in agents.items() if not game.powers[p].is_eliminated()]
|
||||
order_power_names = [p for p in active_powers if gather_possible_orders(game, p)]
|
||||
submitted_orders_this_phase = defaultdict(list)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue