added prompt for each MCP

This commit is contained in:
Aditya Mehta 2025-05-18 17:02:45 -07:00
parent 366f82d5bc
commit e78d2c5bc9

View file

@ -0,0 +1,147 @@
from datasets import load_dataset
import pandas as pd
import random
# Load the MCP servers dataset
try:
ds = load_dataset("DeepNLP/mcp-servers")
train_ds = ds["train"]
df = train_ds.to_pandas()
except Exception as e:
print(f"Error loading dataset: {e}")
# Create dummy data for demonstration
df = pd.DataFrame({
'content_name': ['AgentRPC', 'Git', 'Actors MCP Server'],
'description': [
'Toggle menu Node.js Go Python',
'Tools to read, search, and manipulate code',
'Use 3,000+ pre-built cloud tools'
],
'subfield': ['MCP SERVER', 'MCP SERVER', 'MCP SERVER'],
'field': ['AI AGENT', 'AI AGENT', 'AI AGENT']
})
# Define template prompts based on server types
general_templates = [
"I need to {action} {object}. Can you help me?",
"How can I {action} using {tool}?",
"I'm trying to {action}. What's the best way to do this?",
"Can you assist me with {action}?",
"What's the process for {action} with {tool}?"
]
# Define specific actions based on server type
server_specific_actions = {
"AgentRPC": [
"call a remote procedure",
"establish a connection with a remote server",
"execute a function on another machine",
"implement RPC in my application",
"set up agent communication"
],
"Git": [
"merge my branch",
"resolve a merge conflict",
"check the commit history",
"revert to a previous commit",
"create a new branch"
],
"AWS KB Retrieval": [
"find information about AWS services",
"query the AWS knowledge base",
"lookup AWS documentation",
"get help with AWS configuration",
"understand AWS pricing"
],
"Anki": [
"create flashcards for studying",
"improve my spaced repetition system",
"organize my study notes",
"set up a memory training system",
"track my learning progress"
],
"ArangoDB": [
"query a graph database",
"store connected data",
"implement a multi-model database",
"perform graph traversals",
"optimize my database queries"
]
}
# Default actions for any server not specifically defined
default_actions = [
"connect to a server",
"use an API",
"access external data",
"integrate with a tool",
"automate a process"
]
def generate_prompt_for_server(server_name, description):
"""Generate a contextually appropriate prompt for a given server"""
# Extract potential actions from description if available
actions = []
if description and isinstance(description, str):
words = description.lower().split()
verbs = ["use", "toggle", "enable", "explore", "search", "read", "process", "connect", "build"]
for verb in verbs:
if verb in words:
idx = words.index(verb)
if idx < len(words) - 1:
actions.append(f"{verb} {words[idx+1]}")
# If we couldn't extract meaningful actions, use predefined ones
if not actions:
if server_name in server_specific_actions:
actions = server_specific_actions[server_name]
else:
actions = default_actions
# Get a random action and template
action = random.choice(actions)
template = random.choice(general_templates)
# Fill in the template
prompt = template.format(
action=action,
object=server_name,
tool=server_name
)
return prompt
# Generate prompts for each entry in the dataset
prompts = []
for idx, row in df.iterrows():
server_name = row['content_name']
description = row.get('description', '')
prompt = generate_prompt_for_server(server_name, description)
prompts.append(prompt)
# Add the prompts as a new column
df['prompt'] = prompts
# Preview the results
print("\nDataset with Added Prompts:")
print(df[['content_name', 'prompt']].head())
# Save the modified dataset
from datasets import Dataset
modified_ds = Dataset.from_pandas(df)
modified_ds.save_to_disk("./modified_mcp_dataset")
print("\nModified dataset saved to ./modified_mcp_dataset")
print("You can load it in your RL environment with:")
print("from datasets import load_from_disk")
print("custom_dataset = load_from_disk('./modified_mcp_dataset')")
# To demonstrate how this would look in your RL environment
print("\nExample usage in RL environment:")
print("="*60)
for idx, row in df.head(3).iterrows():
print(f"User Query: {row['prompt']}")
print(f"Available Tool: {row['content_name']}")
print(f"Tool Type: {row['subfield']}")
print("-"*40)