mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-25 17:10:42 +00:00
Enhance DynastAI scenario generation with structured point system and history tracking
- Updated point system guidelines to restrict values for Piety, Stability, Power, and Wealth to a range of -20 to 20. - Introduced a category field for scenario classification. - Added examples for scenario generation to improve clarity. - Implemented choice history tracking to maintain a record of player decisions and their effects on the kingdom's state. - Enhanced prompt formatting to include current kingdom state and recent choices for better context in scenario generation.
This commit is contained in:
parent
e0dabe1225
commit
fba2a42bb7
1 changed files with 188 additions and 45 deletions
|
|
@ -30,20 +30,79 @@ You are playing a game called DynastAI where you generate scenarios for a kingdo
|
|||
Each scenario should include a character presenting a dilemma to the ruler, with two choices that affect
|
||||
the four key resources of the kingdom: Piety, Stability, Power, and Wealth.
|
||||
|
||||
**Point System Guidelines:**
|
||||
- The point values for Piety, Stability, Power, and Wealth for each choice should be integers ranging from -20 to 20.
|
||||
- These values should be logically consistent with the scenario and the choice described. A choice that is clearly beneficial should have a net positive sum of points, while a detrimental choice should have a net negative sum.
|
||||
- Strive for a variety of point distributions; not all resources need to be affected by every choice.
|
||||
|
||||
Your response must be a valid JSON object with the following structure:
|
||||
{
|
||||
"Character": "Name/Title of the character",
|
||||
"Prompt": "The scenario description",
|
||||
"Left_Choice": "The first choice option",
|
||||
"Left_Piety": integer value between -30 and 30,
|
||||
"Left_Stability": integer value between -30 and 30,
|
||||
"Left_Power": integer value between -30 and 30,
|
||||
"Left_Wealth": integer value between -30 and 30,
|
||||
"Left_Piety": integer value between -20 and 20,
|
||||
"Left_Stability": integer value between -20 and 20,
|
||||
"Left_Power": integer value between -20 and 20,
|
||||
"Left_Wealth": integer value between -20 and 20,
|
||||
"Right_Choice": "The second choice option",
|
||||
"Right_Piety": integer value between -30 and 30,
|
||||
"Right_Stability": integer value between -30 and 30,
|
||||
"Right_Power": integer value between -30 and 30,
|
||||
"Right_Wealth": integer value between -30 and 30
|
||||
"Right_Piety": integer value between -20 and 20,
|
||||
"Right_Stability": integer value between -20 and 20,
|
||||
"Right_Power": integer value between -20 and 20,
|
||||
"Right_Wealth": integer value between -20 and 20,
|
||||
"category": "piety/stability/power/wealth"
|
||||
}
|
||||
|
||||
Here are some examples:
|
||||
|
||||
Example 1:
|
||||
{
|
||||
"Character": "Diplomat",
|
||||
"Prompt": "With a sly smile, the diplomat gestures broadly: \"Sire, the lords quarrel like children. Shall we mediate disputes between lords?\"",
|
||||
"Left_Choice": "We cannot risk the kingdom's future; dismiss them with a royal wave.",
|
||||
"Left_Piety": 10,
|
||||
"Left_Stability": -10,
|
||||
"Left_Power": 0,
|
||||
"Left_Wealth": 0,
|
||||
"Right_Choice": "Make it so; our enemies shall kneel in terror!",
|
||||
"Right_Piety": -10,
|
||||
"Right_Stability": 10,
|
||||
"Right_Power": 0,
|
||||
"Right_Wealth": 0,
|
||||
"category": "stability"
|
||||
}
|
||||
|
||||
Example 2:
|
||||
{
|
||||
"Character": "Merchant",
|
||||
"Prompt": "The merchant nervously fidgets with coins: \"My king, the markets groan under heavy tariffs. Shall we reduce tariffs?\"",
|
||||
"Left_Choice": "Absurd! Unthinkable; it's madness that courts disaster.",
|
||||
"Left_Piety": 0,
|
||||
"Left_Stability": -15,
|
||||
"Left_Power": 0,
|
||||
"Left_Wealth": 10,
|
||||
"Right_Choice": "Brilliant! Most ingenious; begin before the sun sets!",
|
||||
"Right_Piety": 0,
|
||||
"Right_Stability": 15,
|
||||
"Right_Power": 0,
|
||||
"Right_Wealth": -10,
|
||||
"category": "wealth"
|
||||
}
|
||||
|
||||
Example 3:
|
||||
{
|
||||
"Character": "Farmer",
|
||||
"Prompt": "Mud-stained and weary, the farmer removes his cap: \"Your Grace, our villages yearn for markets. Shall we hold local markets?\"",
|
||||
"Left_Choice": "Silence! Such talk borders on treason; it whispers of rebellion and ruin.",
|
||||
"Left_Piety": 0,
|
||||
"Left_Stability": -15,
|
||||
"Left_Power": 0,
|
||||
"Left_Wealth": 10,
|
||||
"Right_Choice": "Indeed! We shall usher wealth and fortune to the land!",
|
||||
"Right_Piety": 0,
|
||||
"Right_Stability": 15,
|
||||
"Right_Power": 0,
|
||||
"Right_Wealth": -10,
|
||||
"category": "stability"
|
||||
}
|
||||
|
||||
Be creative and make each scenario interesting!"""
|
||||
|
|
@ -51,7 +110,8 @@ Be creative and make each scenario interesting!"""
|
|||
|
||||
class DynastAIRow(TypedDict):
|
||||
scenario_prompt: str
|
||||
card: Optional[Dict] = None
|
||||
kingdom_current_state: Optional[Dict] = None
|
||||
choice_history: Optional[List] = None
|
||||
|
||||
|
||||
class DynastAIEnv(BaseEnv):
|
||||
|
|
@ -129,42 +189,58 @@ class DynastAIEnv(BaseEnv):
|
|||
|
||||
self.train = cards[test_size:]
|
||||
self.test = cards[:test_size]
|
||||
|
||||
# Keep scenario prompts for generating new scenarios
|
||||
self.scenario_prompts = [
|
||||
"Create a dilemma involving the Church and Treasury",
|
||||
"Create a dilemma involving the Military and People",
|
||||
"Create a scenario where a foreign diplomat visits",
|
||||
"Create a scenario about a natural disaster",
|
||||
"Create a scenario about a rebellious noble",
|
||||
"Create a scenario about a religious conflict",
|
||||
"Create a scenario about a military campaign",
|
||||
"Create a scenario about a royal marriage proposal",
|
||||
"Create a scenario about a trade agreement",
|
||||
"Create a scenario about a mysterious artifact",
|
||||
"Create a scenario about peasant unrest",
|
||||
"Create a scenario about a spy in the court",
|
||||
"Create a scenario about a disputed succession",
|
||||
"Create a scenario about a diplomatic incident",
|
||||
"Create a scenario about a technological innovation",
|
||||
]
|
||||
self.iter = 0
|
||||
|
||||
# Initialize default kingdom state
|
||||
self.current_kingdom_state = {
|
||||
"Piety": 50,
|
||||
"Stability": 50,
|
||||
"Power": 50,
|
||||
"Wealth": 50
|
||||
}
|
||||
self.choice_history = []
|
||||
|
||||
def save_checkpoint(self, step, data=None):
|
||||
if data is None:
|
||||
data = {}
|
||||
data["iter"] = self.iter
|
||||
data["current_kingdom_state"] = self.current_kingdom_state
|
||||
data["choice_history"] = self.choice_history
|
||||
super().save_checkpoint(step, data)
|
||||
|
||||
async def evaluate(self, *args, **kwargs):
|
||||
# For evaluation, we'll use the test set cards
|
||||
eval_tasks = []
|
||||
for card in self.test:
|
||||
eval_tasks.append(self.rollout_and_score_eval(f"Create a scenario similar to: {card['Prompt']}"))
|
||||
input_data = card.get("input", {})
|
||||
kingdom_state = input_data.get("kingdom_current_state", self.current_kingdom_state)
|
||||
choice_history = input_data.get("choice_history", [])
|
||||
prompt = self.format_prompt(kingdom_state, choice_history)
|
||||
eval_tasks.append(self.rollout_and_score_eval(prompt))
|
||||
|
||||
scores = await tqdm_asyncio.gather(*eval_tasks)
|
||||
self.eval_metrics.append(("eval/percent_correct", sum(scores) / len(scores)))
|
||||
|
||||
def format_prompt(self, kingdom_state, choice_history):
|
||||
prompt = "Generate a new scenario for the kingdom with the following current state:\n"
|
||||
prompt += f"Piety: {kingdom_state.get('Piety', 50)}, "
|
||||
prompt += f"Stability: {kingdom_state.get('Stability', 50)}, "
|
||||
prompt += f"Power: {kingdom_state.get('Power', 50)}, "
|
||||
prompt += f"Wealth: {kingdom_state.get('Wealth', 50)}\n\n"
|
||||
|
||||
if choice_history:
|
||||
prompt += "Previous choices made:\n"
|
||||
for i, choice in enumerate(choice_history[-3:]): # Show last 3 choices at most
|
||||
prompt += f"{i+1}. {choice.get('Character', 'Unknown')} presented: \"{choice.get('Prompt', 'Unknown')}\"\n"
|
||||
prompt += f" Decision: {choice.get('choice_made', 'Unknown')}\n"
|
||||
prompt += f" Effects: Piety {choice.get('effects', {}).get('Piety', 0)}, "
|
||||
prompt += f"Stability {choice.get('effects', {}).get('Stability', 0)}, "
|
||||
prompt += f"Power {choice.get('effects', {}).get('Power', 0)}, "
|
||||
prompt += f"Wealth {choice.get('effects', {}).get('Wealth', 0)}\n\n"
|
||||
|
||||
prompt += "Based on this context, generate a new challenging scenario for the ruler."
|
||||
return prompt
|
||||
|
||||
async def rollout_and_score_eval(self, scenario_prompt: str) -> number:
|
||||
completion = await self.server.chat_completion(
|
||||
messages=[
|
||||
|
|
@ -201,7 +277,8 @@ class DynastAIEnv(BaseEnv):
|
|||
required_fields = [
|
||||
"Character", "Prompt",
|
||||
"Left_Choice", "Left_Piety", "Left_Stability", "Left_Power", "Left_Wealth",
|
||||
"Right_Choice", "Right_Piety", "Right_Stability", "Right_Power", "Right_Wealth"
|
||||
"Right_Choice", "Right_Piety", "Right_Stability", "Right_Power", "Right_Wealth",
|
||||
"category"
|
||||
]
|
||||
|
||||
if not all(field in data for field in required_fields):
|
||||
|
|
@ -216,9 +293,13 @@ class DynastAIEnv(BaseEnv):
|
|||
for field in numeric_fields:
|
||||
if not isinstance(data[field], int):
|
||||
return 0
|
||||
if data[field] < -30 or data[field] > 30:
|
||||
if data[field] < -20 or data[field] > 20:
|
||||
return 0
|
||||
|
||||
# Check category field
|
||||
if data["category"] not in ["piety", "stability", "power", "wealth"]:
|
||||
return 0
|
||||
|
||||
# If we made it here, the JSON is valid
|
||||
return 1
|
||||
|
||||
|
|
@ -236,8 +317,8 @@ class DynastAIEnv(BaseEnv):
|
|||
max_tokens=self.config.max_token_length,
|
||||
)
|
||||
|
||||
to_score = list()
|
||||
to_backlog = list()
|
||||
to_score = []
|
||||
to_backlog = []
|
||||
|
||||
for i, chat_completion in enumerate(chat_completions.choices):
|
||||
messages = (
|
||||
|
|
@ -251,6 +332,39 @@ class DynastAIEnv(BaseEnv):
|
|||
})
|
||||
|
||||
to_postprocess = await self.score(to_score)
|
||||
|
||||
# Update choice history with the highest scoring scenario
|
||||
if to_postprocess and to_postprocess["scores"]:
|
||||
best_idx = to_postprocess["scores"].index(max(to_postprocess["scores"]))
|
||||
best_content = to_score[best_idx]["messages"][-1]["content"]
|
||||
|
||||
try:
|
||||
# Extract JSON from content
|
||||
if "</think>" in best_content:
|
||||
best_content = best_content.split("</think>")[-1].strip()
|
||||
json_match = re.search(r'\{.*\}', best_content, re.DOTALL)
|
||||
if json_match:
|
||||
json_str = json_match.group(0)
|
||||
data = json.loads(json_str)
|
||||
|
||||
# Store the generated scenario in choice history
|
||||
self.choice_history.append({
|
||||
"Character": data.get("Character", "Unknown"),
|
||||
"Prompt": data.get("Prompt", "Unknown"),
|
||||
"choice_made": "Unknown", # Will be set when player makes a choice
|
||||
"effects": {
|
||||
"Piety": 0,
|
||||
"Stability": 0,
|
||||
"Power": 0,
|
||||
"Wealth": 0
|
||||
},
|
||||
"category": data.get("category", "unknown"),
|
||||
# Store the full scenario data for later use
|
||||
"scenario_data": data
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Error processing scenario: {e}")
|
||||
|
||||
return to_postprocess, to_backlog
|
||||
|
||||
async def score(
|
||||
|
|
@ -293,19 +407,48 @@ class DynastAIEnv(BaseEnv):
|
|||
return scores
|
||||
|
||||
async def get_next_item(self) -> DynastAIRow:
|
||||
# Alternate between using saved cards and generating new scenarios
|
||||
if self.iter % 2 == 0 and self.train:
|
||||
# Use a card from the training set
|
||||
card_index = (self.iter // 2) % len(self.train)
|
||||
card = self.train[card_index]
|
||||
prompt = f"Create a scenario similar to: {card['Prompt']}"
|
||||
self.iter += 1
|
||||
return {"scenario_prompt": prompt, "card": card}
|
||||
# Increment counter
|
||||
self.iter += 1
|
||||
|
||||
# Occasionally sample from training data, otherwise use current state
|
||||
if self.train and random.random() < 0.3:
|
||||
card = random.choice(self.train)
|
||||
input_data = card.get("input", {})
|
||||
kingdom_state = input_data.get("kingdom_current_state", self.current_kingdom_state)
|
||||
choice_history = input_data.get("choice_history", [])
|
||||
else:
|
||||
# Generate a completely new scenario
|
||||
prompt = random.choice(self.scenario_prompts)
|
||||
self.iter += 1
|
||||
return {"scenario_prompt": prompt}
|
||||
kingdom_state = self.current_kingdom_state
|
||||
choice_history = self.choice_history
|
||||
|
||||
# Generate prompt based on kingdom state and choice history
|
||||
prompt = self.format_prompt(kingdom_state, choice_history)
|
||||
|
||||
return {
|
||||
"scenario_prompt": prompt,
|
||||
"kingdom_current_state": kingdom_state,
|
||||
"choice_history": choice_history
|
||||
}
|
||||
|
||||
# Helper method to update kingdom state based on a choice
|
||||
def update_kingdom_state(self, choice, is_left_choice=True):
|
||||
choice_prefix = "Left_" if is_left_choice else "Right_"
|
||||
|
||||
# Update the most recent choice in the history with the player's decision
|
||||
if self.choice_history:
|
||||
most_recent = self.choice_history[-1]
|
||||
most_recent["choice_made"] = choice.get(f"{choice_prefix}Choice", "Unknown")
|
||||
|
||||
# Update effects based on the choice
|
||||
effects = {}
|
||||
for resource in ["Piety", "Stability", "Power", "Wealth"]:
|
||||
value = choice.get(f"{choice_prefix}{resource}", 0)
|
||||
effects[resource] = value
|
||||
|
||||
# Apply effect to current kingdom state
|
||||
current_value = self.current_kingdom_state.get(resource, 50)
|
||||
self.current_kingdom_state[resource] = max(0, min(100, current_value + value))
|
||||
|
||||
most_recent["effects"] = effects
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue