mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-23 16:54:56 +00:00
linting & moved to community
This commit is contained in:
parent
7b194642b3
commit
46892c7bdc
11 changed files with 135 additions and 47 deletions
157
environments/community/mcp_tool_calling/MCP_datasets.py
Normal file
157
environments/community/mcp_tool_calling/MCP_datasets.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
import random
|
||||
|
||||
import pandas as pd
|
||||
from datasets import Dataset, load_dataset
|
||||
|
||||
# Load the MCP servers dataset
|
||||
try:
|
||||
ds = load_dataset("DeepNLP/mcp-servers")
|
||||
train_ds = ds["train"]
|
||||
df = train_ds.to_pandas()
|
||||
except Exception as e:
|
||||
print(f"Error loading dataset: {e}")
|
||||
# Create dummy data for demonstration
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"content_name": ["AgentRPC", "Git", "Actors MCP Server"],
|
||||
"description": [
|
||||
"Toggle menu Node.js Go Python",
|
||||
"Tools to read, search, and manipulate code",
|
||||
"Use 3,000+ pre-built cloud tools",
|
||||
],
|
||||
"subfield": ["MCP SERVER", "MCP SERVER", "MCP SERVER"],
|
||||
"field": ["AI AGENT", "AI AGENT", "AI AGENT"],
|
||||
}
|
||||
)
|
||||
|
||||
# Define template prompts based on server types
|
||||
general_templates = [
|
||||
"I need to {action} {object}. Can you help me?",
|
||||
"How can I {action} using {tool}?",
|
||||
"I'm trying to {action}. What's the best way to do this?",
|
||||
"Can you assist me with {action}?",
|
||||
"What's the process for {action} with {tool}?",
|
||||
]
|
||||
|
||||
# Define specific actions based on server type
|
||||
server_specific_actions = {
|
||||
"AgentRPC": [
|
||||
"call a remote procedure",
|
||||
"establish a connection with a remote server",
|
||||
"execute a function on another machine",
|
||||
"implement RPC in my application",
|
||||
"set up agent communication",
|
||||
],
|
||||
"Git": [
|
||||
"merge my branch",
|
||||
"resolve a merge conflict",
|
||||
"check the commit history",
|
||||
"revert to a previous commit",
|
||||
"create a new branch",
|
||||
],
|
||||
"AWS KB Retrieval": [
|
||||
"find information about AWS services",
|
||||
"query the AWS knowledge base",
|
||||
"lookup AWS documentation",
|
||||
"get help with AWS configuration",
|
||||
"understand AWS pricing",
|
||||
],
|
||||
"Anki": [
|
||||
"create flashcards for studying",
|
||||
"improve my spaced repetition system",
|
||||
"organize my study notes",
|
||||
"set up a memory training system",
|
||||
"track my learning progress",
|
||||
],
|
||||
"ArangoDB": [
|
||||
"query a graph database",
|
||||
"store connected data",
|
||||
"implement a multi-model database",
|
||||
"perform graph traversals",
|
||||
"optimize my database queries",
|
||||
],
|
||||
}
|
||||
|
||||
# Default actions for any server not specifically defined
|
||||
default_actions = [
|
||||
"connect to a server",
|
||||
"use an API",
|
||||
"access external data",
|
||||
"integrate with a tool",
|
||||
"automate a process",
|
||||
]
|
||||
|
||||
|
||||
def generate_prompt_for_server(server_name, description):
|
||||
"""Generate a contextually appropriate prompt for a given server"""
|
||||
|
||||
# Extract potential actions from description if available
|
||||
actions = []
|
||||
if description and isinstance(description, str):
|
||||
words = description.lower().split()
|
||||
verbs = [
|
||||
"use",
|
||||
"toggle",
|
||||
"enable",
|
||||
"explore",
|
||||
"search",
|
||||
"read",
|
||||
"process",
|
||||
"connect",
|
||||
"build",
|
||||
]
|
||||
for verb in verbs:
|
||||
if verb in words:
|
||||
idx = words.index(verb)
|
||||
if idx < len(words) - 1:
|
||||
actions.append(f"{verb} {words[idx+1]}")
|
||||
|
||||
# If we couldn't extract meaningful actions, use predefined ones
|
||||
if not actions:
|
||||
if server_name in server_specific_actions:
|
||||
actions = server_specific_actions[server_name]
|
||||
else:
|
||||
actions = default_actions
|
||||
|
||||
# Get a random action and template
|
||||
action = random.choice(actions)
|
||||
template = random.choice(general_templates)
|
||||
|
||||
# Fill in the template
|
||||
prompt = template.format(action=action, object=server_name, tool=server_name)
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
# Generate prompts for each entry in the dataset
|
||||
prompts = []
|
||||
for idx, row in df.iterrows():
|
||||
server_name = row["content_name"]
|
||||
description = row.get("description", "")
|
||||
prompt = generate_prompt_for_server(server_name, description)
|
||||
prompts.append(prompt)
|
||||
|
||||
# Add the prompts as a new column
|
||||
df["prompt"] = prompts
|
||||
|
||||
# Preview the results
|
||||
print("\nDataset with Added Prompts:")
|
||||
print(df[["content_name", "prompt"]].head())
|
||||
|
||||
# Save the modified dataset
|
||||
modified_ds = Dataset.from_pandas(df)
|
||||
modified_ds.save_to_disk("./modified_mcp_dataset")
|
||||
|
||||
print("\nModified dataset saved to ./modified_mcp_dataset")
|
||||
print("You can load it in your RL environment with:")
|
||||
print("from datasets import load_from_disk")
|
||||
print("custom_dataset = load_from_disk('./modified_mcp_dataset')")
|
||||
|
||||
# To demonstrate how this would look in your RL environment
|
||||
print("\nExample usage in RL environment:")
|
||||
print("=" * 60)
|
||||
for idx, row in df.head(3).iterrows():
|
||||
print(f"User Query: {row['prompt']}")
|
||||
print(f"Available Tool: {row['content_name']}")
|
||||
print(f"Tool Type: {row['subfield']}")
|
||||
print("-" * 40)
|
||||
Loading…
Add table
Add a link
Reference in a new issue