Reorganize community environments - Move lean_proof_env, router_env, and philosophical_rlaif_env.py to environments/community/ - Add comprehensive README for community environments - This organizes community-contributed environments into a dedicated community folder for better maintainability and discoverability

This commit is contained in:
Shannon Sands 2025-05-23 13:31:13 +10:00
parent 945ea30c3a
commit e85a170c34
53 changed files with 85 additions and 0 deletions

View file

@ -0,0 +1 @@
OPENAI_API_KEY="your_openai_api_key_here"

View file

@ -0,0 +1,155 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds peculation files. Make sure to keep the
# template files.
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*_model
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to p<omitted>
# Pipfile.lock
# poetry
# Uncomment if you are using poetry.
# poetry.lock
# pdm
# Uncomment if you are using pdm.
# pdm.lock
# .pdm.toml
# PEP 582; remove simple <omitted>
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static analyzer
.pytype/
# Cython cache files
cython_debug/
# Language Server Protocol
.lsp_boosterik/
.metals/
.classpath
.project
.settings
.tabs
# Router RL Env specific (if any HTML/JSONL outputs are not meant for commit)
# router_rl_env_rollouts.html
# router_rl_env_rollouts.jsonl
# router_rl_env_eval_rollouts.html
# router_rl_env_eval_rollouts.jsonl

View file

@ -0,0 +1,3 @@
# Engine
This directory contains the backend services and AI agents for the Pebble (Stone UI) project.

View file

@ -0,0 +1,58 @@
import logging
import os
from dotenv import load_dotenv
from pathlib import Path
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from dotenv import load_dotenv
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
logger = logging.getLogger("mcp-agent")
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
class MyAgent(Agent):
def __init__(self) -> None:
super().__init__(
instructions=(
"You can retrieve data via the MCP server. The interface is voice-based: "
"accept spoken user queries and respond with synthesized speech."
),
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="ash"),
turn_detection=MultilingualModel(),
mcp_servers=[
mcp.MCPServerHTTP(
url="https://mcp.gumloop.com/gcalendar/cY3bcaFS1qNdeVBnj0XIhnP4FEp2%3Aae99858e75594251bea9e05f32bb99b3",
timeout=5,
client_session_timeout_seconds=5,
),
]
)
async def on_enter(self):
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
await ctx.connect()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="ash"),
turn_detection=MultilingualModel(),
)
await session.start(agent=MyAgent(), room=ctx.room)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint,
agent_name="mcp-agent"))

View file

@ -0,0 +1,147 @@
import os
import logging
import asyncio
from dotenv import load_dotenv
from livekit.agents import mcp
from livekit.agents.llm import ChatContext, ChatMessage, LLM # Removed ChatRole as using strings
from livekit.plugins import openai
logger = logging.getLogger("text-perplexity-agent")
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
# --- Configure Perplexity MCP Server (as a function to allow async context management) ---
def get_perplexity_mcp_server():
if os.environ.get("PERPLEXITY_API_KEY"):
mcp_script_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'tools', 'mcp', 'perplexity', 'perplexity-ask', 'dist', 'index.js'
))
if not os.path.exists(mcp_script_path):
logger.error(f"❌ MCP script not found at {mcp_script_path}. Make sure you\'ve run \'npm install && npm run build\' in the server directory.")
logger.warning("⚠️ Perplexity tools will be unavailable.")
return None
else:
logger.info(f"📂 Configuring Perplexity MCP server with script: {mcp_script_path}")
return mcp.MCPServerStdio(
name="PerplexityStdioServer",
params={
"command": "node",
"args": [mcp_script_path],
"cwd": os.path.dirname(mcp_script_path),
"env": {"PERPLEXITY_API_KEY": os.environ.get("PERPLEXITY_API_KEY") or ""},
"client_session_timeout_seconds": 30
},
client_session_timeout_seconds=30
)
else:
logger.warning("⚠️ PERPLEXITY_API_KEY not set. Perplexity tools will be unavailable.")
return None
async def run_chat_loop(llm_instance: LLM, p_mcp_server: mcp.MCPServerStdio | None, initial_question: str = None):
"""Runs a text-based chat loop with the LLM and Perplexity tool."""
chat_context = ChatContext()
system_prompt = \
"""
You are a specialized assistant for answering general knowledge questions, providing explanations,
and performing web searches using the 'perplexity_ask' tool.
When the user asks for information, facts, or to 'search the web', you are the designated expert.
When calling the 'perplexity_ask' tool, ensure the 'messages' argument is an array containing a single object
with 'role': 'user' and 'content' set to the user's question.
For example: {"messages": [{"role": "user", "content": "What is the capital of France?"}]}
You do not have other tools. Do not try to delegate.
"""
chat_context.add_message(role="system", content=system_prompt)
async def process_question(question: str):
logger.info(f"You: {question}")
chat_context.add_message(role="user", content=question)
full_response = ""
logger.info("Agent:")
mcp_servers_to_use = []
if p_mcp_server:
# MCPServerStdio is managed by async with in main, so it should be running
mcp_servers_to_use.append(p_mcp_server)
logger.info("Perplexity MCP Server is available for this query.")
try:
logger.info(f"DEBUG: Type of chat_context: {type(chat_context)}")
logger.info(f"DEBUG: Attributes of chat_context: {dir(chat_context)}")
# Pass messages from ChatContext and the list of mcp_servers
async for chunk in llm_instance.chat(messages=chat_context.messages, mcp_servers=mcp_servers_to_use):
if chunk.delta.content:
print(chunk.delta.content, end="", flush=True)
full_response += chunk.delta.content
if chunk.delta.tool_calls:
logger.info(f"\n[Tool call detected: {chunk.delta.tool_calls}]")
except Exception as e:
logger.error(f"Error during LLM chat: {e}")
print(f"Sorry, I encountered an error: {e}")
return
print()
chat_context.add_message(role="assistant", content=full_response)
if initial_question:
await process_question(initial_question)
while True:
try:
user_input = await asyncio.to_thread(input, "You: ")
if user_input.lower() in ["exit", "quit"]:
logger.info("Exiting chat.")
break
if not user_input.strip():
continue
await process_question(user_input)
except KeyboardInterrupt:
logger.info("\nExiting chat due to interrupt.")
break
except EOFError:
logger.info("\nExiting chat due to EOF.")
break
async def main():
"""Main entrypoint for the text-based Perplexity agent."""
logger.info("Starting Text-based Perplexity Agent...")
llm_instance = openai.LLM(model="gpt-4o")
p_mcp_server_instance = get_perplexity_mcp_server()
test_question = "What is the capital of France?"
if p_mcp_server_instance:
try:
# await p_mcp_server_instance.connect() # Connect to MCP server -> Removed
logger.info("Perplexity MCP Server instance created. Will be used by LLM if needed.")
await run_chat_loop(llm_instance, p_mcp_server_instance, initial_question=test_question)
finally:
logger.info("Closing Perplexity MCP server resources.") # Changed log message
await p_mcp_server_instance.aclose() # Close MCP server connection
else:
logger.warning("Running chat loop without Perplexity MCP server.")
await run_chat_loop(llm_instance, None, initial_question=test_question)
logger.info("Text-based Perplexity Agent finished.")
if __name__ == "__main__":
if not os.environ.get("PERPLEXITY_API_KEY"):
logger.error("🔴 PERPLEXITY_API_KEY is not set in the environment.")
logger.error("🔴 Please set it in your .env file for the agent to function correctly with Perplexity.")
if os.environ.get("PERPLEXITY_API_KEY"):
mcp_script_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'tools', 'mcp', 'perplexity', 'perplexity-ask', 'dist', 'index.js'
))
if not os.path.exists(mcp_script_path):
logger.error(f"❌ Critical: MCP script not found at {mcp_script_path}.")
logger.error("❌ The agent cannot use Perplexity tools. Please build the MCP server ('npm install && npm run build' in its directory).")
exit(1)
asyncio.run(main())

View file

@ -0,0 +1,147 @@
import os
import logging
import asyncio
from dotenv import load_dotenv
from livekit.agents import mcp
from livekit.agents.llm import ChatContext, ChatMessage, LLM # Removed ChatRole as using strings
from livekit.plugins import openai
logger = logging.getLogger("text-perplexity-agent")
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
# --- Configure Perplexity MCP Server (as a function to allow async context management) ---
def get_perplexity_mcp_server():
if os.environ.get("PERPLEXITY_API_KEY"):
mcp_script_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'tools', 'mcp', 'perplexity', 'perplexity-ask', 'dist', 'index.js'
))
if not os.path.exists(mcp_script_path):
logger.error(f"❌ MCP script not found at {mcp_script_path}. Make sure you\'ve run \'npm install && npm run build\' in the server directory.")
logger.warning("⚠️ Perplexity tools will be unavailable.")
return None
else:
logger.info(f"📂 Configuring Perplexity MCP server with script: {mcp_script_path}")
return mcp.MCPServerStdio(
name="PerplexityStdioServer",
params={
"command": "node",
"args": [mcp_script_path],
"cwd": os.path.dirname(mcp_script_path),
"env": {"PERPLEXITY_API_KEY": os.environ.get("PERPLEXITY_API_KEY") or ""},
"client_session_timeout_seconds": 30
},
client_session_timeout_seconds=30
)
else:
logger.warning("⚠️ PERPLEXITY_API_KEY not set. Perplexity tools will be unavailable.")
return None
async def run_chat_loop(llm_instance: LLM, p_mcp_server: mcp.MCPServerStdio | None, initial_question: str = None):
"""Runs a text-based chat loop with the LLM and Perplexity tool."""
chat_context = ChatContext()
system_prompt = \
"""
You are a specialized assistant for answering general knowledge questions, providing explanations,
and performing web searches using the 'perplexity_ask' tool.
When the user asks for information, facts, or to 'search the web', you are the designated expert.
When calling the 'perplexity_ask' tool, ensure the 'messages' argument is an array containing a single object
with 'role': 'user' and 'content' set to the user's question.
For example: {"messages": [{"role": "user", "content": "What is the capital of France?"}]}
You do not have other tools. Do not try to delegate.
"""
chat_context.add_message(role="system", content=system_prompt)
async def process_question(question: str):
logger.info(f"You: {question}")
chat_context.add_message(role="user", content=question)
full_response = ""
logger.info("Agent:")
mcp_servers_to_use = []
if p_mcp_server:
# MCPServerStdio is managed by async with in main, so it should be running
mcp_servers_to_use.append(p_mcp_server)
logger.info("Perplexity MCP Server is available for this query.")
try:
logger.info(f"DEBUG: Type of chat_context: {type(chat_context)}")
logger.info(f"DEBUG: Attributes of chat_context: {dir(chat_context)}")
# Pass messages from ChatContext and the list of mcp_servers
async for chunk in llm_instance.chat(messages=chat_context.messages, mcp_servers=mcp_servers_to_use):
if chunk.delta.content:
print(chunk.delta.content, end="", flush=True)
full_response += chunk.delta.content
if chunk.delta.tool_calls:
logger.info(f"\n[Tool call detected: {chunk.delta.tool_calls}]")
except Exception as e:
logger.error(f"Error during LLM chat: {e}")
print(f"Sorry, I encountered an error: {e}")
return
print()
chat_context.add_message(role="assistant", content=full_response)
if initial_question:
await process_question(initial_question)
while True:
try:
user_input = await asyncio.to_thread(input, "You: ")
if user_input.lower() in ["exit", "quit"]:
logger.info("Exiting chat.")
break
if not user_input.strip():
continue
await process_question(user_input)
except KeyboardInterrupt:
logger.info("\nExiting chat due to interrupt.")
break
except EOFError:
logger.info("\nExiting chat due to EOF.")
break
async def main():
"""Main entrypoint for the text-based Perplexity agent."""
logger.info("Starting Text-based Perplexity Agent...")
llm_instance = openai.LLM(model="gpt-4o")
p_mcp_server_instance = get_perplexity_mcp_server()
test_question = "What is the capital of France?"
if p_mcp_server_instance:
try:
# await p_mcp_server_instance.connect() # Connect to MCP server -> Removed
logger.info("Perplexity MCP Server instance created. Will be used by LLM if needed.")
await run_chat_loop(llm_instance, p_mcp_server_instance, initial_question=test_question)
finally:
logger.info("Closing Perplexity MCP server resources.") # Changed log message
await p_mcp_server_instance.aclose() # Close MCP server connection
else:
logger.warning("Running chat loop without Perplexity MCP server.")
await run_chat_loop(llm_instance, None, initial_question=test_question)
logger.info("Text-based Perplexity Agent finished.")
if __name__ == "__main__":
if not os.environ.get("PERPLEXITY_API_KEY"):
logger.error("🔴 PERPLEXITY_API_KEY is not set in the environment.")
logger.error("🔴 Please set it in your .env file for the agent to function correctly with Perplexity.")
if os.environ.get("PERPLEXITY_API_KEY"):
mcp_script_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'tools', 'mcp', 'perplexity', 'perplexity-ask', 'dist', 'index.js'
))
if not os.path.exists(mcp_script_path):
logger.error(f"❌ Critical: MCP script not found at {mcp_script_path}.")
logger.error("❌ The agent cannot use Perplexity tools. Please build the MCP server ('npm install && npm run build' in its directory).")
exit(1)
asyncio.run(main())

View file

@ -0,0 +1,97 @@
import os
import logging # Added logging
from dotenv import load_dotenv
from livekit.agents import JobContext, WorkerOptions, cli # Changed import
from livekit.agents import mcp # Corrected import for mcp
from livekit.agents.llm import ChatChunk, function_tool # Added function_tool for delegate_to_router_agent if it were defined here
from livekit.agents.voice import Agent, AgentSession
from livekit.plugins import deepgram, openai, silero
# Removed: from mcp_client import MCPServerStdio
# Removed: from mcp_client.agent_tools import MCPToolsIntegration
from livekit.plugins.turn_detector.multilingual import MultilingualModel # Added from official example
from livekit.agents import ChatContext, RunContext # Add ChatContext & RunContext import
from typing import Optional, List # Add Optional & List import
from livekit.agents import tts # Corrected import for tts module
from livekit.agents.types import NOT_GIVEN # Corrected import for NOT_GIVEN
from livekit.agents.utils.misc import is_given # Corrected import for is_given
logger = logging.getLogger("agent-math-official") # Added logger
mcp_script_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'tools', 'mcp', 'calc', 'calc_server.py'
))
class CalculatorAgent(Agent):
"""A LiveKit agent that uses MCP tools from one or more MCP servers."""
def __init__(self,
chat_ctx: ChatContext,
instructions: Optional[str] = None,
mcp_servers: Optional[list[mcp.MCPServer]] = None,
tts: Optional[tts.TTS] = NOT_GIVEN,
tools: Optional[List[function_tool]] = None): # Added tools parameter
final_instructions = instructions if instructions is not None else \
"""
You are a specialist Math assistant. Your expertise is in solving mathematical problems,
performing calculations, arithmetic, and answering questions about numbers.
You have two calculation tools: 'multiply' and 'add'.
When your current math task is complete, or if the user asks for something not related to math,
you MUST use the 'delegate_to_router_agent' tool to return to the main assistant.
"""
# Combine passed tools with any class-defined tools if necessary (none here for now)
all_tools = tools if tools is not None else []
super().__init__(
instructions=final_instructions,
chat_ctx=chat_ctx,
allow_interruptions=True,
mcp_servers=[
mcp.MCPServerStdio(
command="python",
args=[mcp_script_path],
)
# MODIFIED: Removed chat_ctx=chat_ctx argument
],
tools=all_tools # Pass the tools to the parent Agent class
)
# MCP tools are automatically integrated by AgentSession if mcp_servers is configured.
# No need for MCPToolsIntegration or manually adding tools here.
async def llm_node(self, chat_ctx, tools, model_settings):
"""Override the llm_node to say a message when a tool call is detected."""
tool_call_detected = False
async for chunk in super().llm_node(chat_ctx, tools, model_settings):
if isinstance(chunk, ChatChunk) and chunk.delta and chunk.delta.tool_calls and not tool_call_detected:
tool_call_detected = True
# Example: if self.tts: self.session.say("Working on the math problem.")
# Currently, Math agent does not say anything here.
yield chunk
async def on_enter(self):
# when the agent is added to the session, we'll initiate the conversation by
# using the LLM to generate a reply
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
"""Main entrypoint for the LiveKit agent application."""
await ctx.connect() # Connect earlier as in official example
# Directly configure AgentSession with mcp_servers
session = AgentSession(
vad=silero.VAD.load(), # Redundant if agent has it, but official example does this
stt=deepgram.STT(model="nova-2", language="en-US"), # Consistent with agent
llm=openai.LLM(model="gpt-4o"), # Consistent with agent
tts=openai.TTS(voice="alloy"), # Consistent with agent
turn_detection=MultilingualModel(), # Consistent with agent
)
# Instantiate the agent
agent = CalculatorAgent(chat_ctx=session._chat_ctx)
await session.start(agent=agent, room=ctx.room)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

View file

@ -0,0 +1,67 @@
import logging
import os
from dotenv import load_dotenv
from pathlib import Path
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp, ChatContext, function_tool, RunContext
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from typing import Optional, List
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
logger = logging.getLogger("calendar-agent")
class CalendarAgent(Agent):
def __init__(self,
chat_ctx: ChatContext,
tools: Optional[List[function_tool]] = None) -> None:
final_instructions = (
"You are a Calendar specialist. You can help with scheduling, creating, modifying, or querying calendar events, appointments, and meetings. "
"Use tools like 'create_calendar_event', 'get_calendar_events', etc., when available. "
"If your task is complete or the user asks for something outside your calendar capabilities (e.g., math, web search), "
"you MUST use the 'delegate_to_router_agent' tool to return to the main assistant."
)
all_tools = tools if tools is not None else []
mcp_servers_list = []
gumloop_mcp_url = os.getenv("GUMLOOP_CALENDAR_MCP_URL")
if gumloop_mcp_url:
mcp_servers_list.append(
mcp.MCPServerHTTP(
url=gumloop_mcp_url,
timeout=5,
client_session_timeout_seconds=5,
)
)
super().__init__(
instructions=final_instructions,
chat_ctx=chat_ctx,
allow_interruptions=True,
tools=all_tools,
mcp_servers=mcp_servers_list
)
async def on_enter(self):
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
await ctx.connect()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="ash"),
turn_detection=MultilingualModel(),
)
await session.start(agent=CalendarAgent(chat_ctx=session._chat_ctx), room=ctx.room)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

View file

@ -0,0 +1,76 @@
import logging
import os
from dotenv import load_dotenv
from pathlib import Path
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp, ChatContext, RunContext, function_tool
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
import random
from typing import Optional, List
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
from livekit import api
logger = logging.getLogger("caller-agent")
class CallerAgent(Agent):
def __init__(self,
chat_ctx: ChatContext,
tools: Optional[List[function_tool]] = None) -> None:
final_instructions = (
"You are a Caller specialist. Your primary function is to initiate phone calls. " +
"If the user asks to call someone, use the 'make_phone_call' tool. " +
"Currently, you can only call a predefined contact (Sam at +16467085301). Confirm with the user if they want to call this specific contact. " +
"If your task is complete or the user asks for something outside your calling capabilities (e.g., math, web search), " +
"you MUST use the 'delegate_to_router_agent' tool to return to the main assistant."
)
agent_tools = [self.make_phone_call]
all_tools = agent_tools + (tools if tools is not None else [])
super().__init__(
instructions=final_instructions,
chat_ctx=chat_ctx,
allow_interruptions=True,
tools=all_tools
)
self.lkapi = api.LiveKitAPI()
async def on_enter(self):
self.session.generate_reply()
@function_tool
async def make_phone_call(self, context: RunContext, phone_number: str):
"""
Call this function to make a phone call to a user number.
Args:
phone_number: The phone number to call.
"""
await self.lkapi.agent_dispatch.create_dispatch(
api.CreateAgentDispatchRequest(
agent_name="my-telephony-agent",
room=f"outbound-{''.join(str(random.randint(0, 9)) for _ in range(10))}",
metadata='{"phone_number": "+16467085301"}' #HARDCODED
)
)
async def entrypoint(ctx: JobContext):
await ctx.connect()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="ash"),
turn_detection=MultilingualModel(),
)
await session.start(agent=CallerAgent(chat_ctx=session._chat_ctx), room=ctx.room)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint,
agent_name="mcp-agent"))

View file

@ -0,0 +1,69 @@
import logging
import os
from dotenv import load_dotenv
from pathlib import Path
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp, ChatContext, function_tool, RunContext
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from typing import Optional, List
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
logger = logging.getLogger("contact-agent")
class ContactAgent(Agent):
def __init__(self,
chat_ctx: ChatContext,
tools: Optional[List[function_tool]] = None) -> None:
final_instructions = (
"You are a Contact specialist. You can help find contact information such as phone numbers, email addresses, or other details for individuals. " +
"You can also add new contacts or update existing ones if tools like 'get_contact_details', 'add_contact', 'update_contact' are available. " +
"If your task is complete or the user asks for something outside your contact management capabilities (e.g., math, web search), " +
"you MUST use the 'delegate_to_router_agent' tool to return to the main assistant."
)
all_tools = tools if tools is not None else []
mcp_servers_list = []
zapier_mcp_url = os.getenv("ZAPIER_CONTACT_MCP_URL")
if zapier_mcp_url:
mcp_servers_list.append(
mcp.MCPServerHTTP(
url=zapier_mcp_url,
timeout=5,
client_session_timeout_seconds=5,
)
)
else:
logger.warning("ZAPIER_CONTACT_MCP_URL not set. Contact agent may not have all its tools.")
super().__init__(
instructions=final_instructions,
chat_ctx=chat_ctx,
allow_interruptions=True,
mcp_servers=mcp_servers_list,
tools=all_tools
)
async def on_enter(self):
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
await ctx.connect()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="alloy"),
turn_detection=MultilingualModel(),
)
await session.start(agent=ContactAgent(chat_ctx=session._chat_ctx), room=ctx.room)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

View file

@ -0,0 +1,70 @@
import logging
import os
from dotenv import load_dotenv
from pathlib import Path
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp, ChatContext, function_tool, RunContext
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from typing import Optional, List
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
logger = logging.getLogger("gmail-agent")
class GmailAgent(Agent):
def __init__(self,
chat_ctx: ChatContext,
tools: Optional[List[function_tool]] = None) -> None:
final_instructions = (
"You are a Gmail specialist. You can manage emails by reading, searching, sending, and updating them (e.g., marking as read/unread, moving to folders). " +
"Use tools like 'read_emails', 'send_email', and 'update_email' to interact with Gmail. " +
"If sending an email, you might need a recipient; you know Gabin (gabin.fay@gmail.com). " +
"If your task is complete or the user asks for something outside your email management capabilities (e.g., math, calendar), " +
"you MUST use the 'delegate_to_router_agent' tool to return to the main assistant."
)
all_tools = tools if tools is not None else []
mcp_servers_list = []
gumloop_mcp_url = os.getenv("GUMLOOP_GMAIL_MCP_URL")
if gumloop_mcp_url:
mcp_servers_list.append(
mcp.MCPServerHTTP(
url=gumloop_mcp_url,
timeout=5,
client_session_timeout_seconds=5,
)
)
else:
logger.warning("GUMLOOP_GMAIL_MCP_URL not set. Gmail agent may not have all its tools.")
super().__init__(
instructions=final_instructions,
chat_ctx=chat_ctx,
allow_interruptions=True,
mcp_servers=mcp_servers_list,
tools=all_tools
)
async def on_enter(self):
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
await ctx.connect()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="alloy"),
turn_detection=MultilingualModel(),
)
await session.start(agent=GmailAgent(chat_ctx=session._chat_ctx), room=ctx.room)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

View file

@ -0,0 +1,143 @@
import os
import logging
import asyncio
from dotenv import load_dotenv
from livekit.agents import JobContext, WorkerOptions, cli, mcp, function_tool, RunContext
from livekit.agents.llm import ChatChunk, ChatContext, ChatMessage
from livekit.agents.voice import Agent, AgentSession
from livekit.plugins import openai, silero, deepgram
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from typing import Optional, List
logger = logging.getLogger("go-agent-livekit")
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# Load environment variables from .env file
load_dotenv()
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
GOOGLE_MAPS_API_KEY = os.environ.get('GOOGLE_MAPS_API_KEY')
DEEPGRAM_API_KEY = os.environ.get('DEEPGRAM_API_KEY')
if not OPENAI_API_KEY:
logger.critical("🔴 CRITICAL: OPENAI_API_KEY not found. OpenAI plugins will fail.")
if not GOOGLE_MAPS_API_KEY:
logger.critical("🔴 CRITICAL: GOOGLE_MAPS_API_KEY not found. Google Maps MCP server will fail.")
if not DEEPGRAM_API_KEY:
logger.warning("⚠️ WARNING: DEEPGRAM_API_KEY not found. Deepgram STT plugin may have issues.")
mcp_script_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'tools', 'mcp', 'google-maps', 'dist', 'index.js'
))
if not os.path.exists(mcp_script_path):
logger.critical(f"CRITICAL: Google Maps MCP script not found at {mcp_script_path}. Agent cannot start tools.")
class GoAgent(Agent):
"""A LiveKit agent specialized in location-based queries using Google Maps via MCP."""
def __init__(self,
chat_ctx: ChatContext,
tools: Optional[List[function_tool]] = None):
final_instructions = (
"You are the Go Agent, specialized in providing location-based information using Google Maps. "
"You MUST use the available tools to fulfill user queries about locations, directions, distances, and places.\n\n"
"RULE FOR LOCATION REQUESTS: When a user asks about finding a location, getting directions, calculating distances, "
"or information about a place, you MUST use the appropriate Google Maps tool.\n\n"
"Key tools available to you (provided by Google Maps MCP):\n"
"- maps_geocode: Convert an address to coordinates (e.g., maps_geocode address=\"1600 Amphitheatre Parkway, Mountain View, CA\")\n"
"- maps_reverse_geocode: Convert coordinates to an address (e.g., maps_reverse_geocode latitude=37.422 longitude=-122.084)\n"
"- maps_search_places: Search for places (e.g., maps_search_places query=\"restaurants in London\")\n"
"- maps_place_details: Get details for a place_id (e.g., maps_place_details place_id=\"ChIJN1t_tDeuEmsRUsoyG83frY4\")\n"
"- maps_directions: Get directions (e.g., maps_directions origin=\"San Francisco\" destination=\"Los Angeles\" mode=\"driving\")\n"
"- maps_distance_matrix: Calculate distances (e.g., maps_distance_matrix origins=\"New York,Washington D.C.\" destinations=\"Boston,Philadelphia\" mode=\"...\")\n\n"
"RULE FOR TOOL RESULTS: After you receive results from a tool, you MUST analyze the data and provide a clear, "
"helpful response. Format addresses and directions in a readable way, extract key information from place details, "
"and always provide context for coordinates and distances.\n\n"
"If a tool call fails or returns no relevant information, explain clearly to the user and suggest alternatives. "
"If your task is complete or the user asks for something outside your location/maps capabilities (e.g., math, calendar), "
"you MUST use the 'delegate_to_router_agent' tool to return to the main assistant."
)
all_tools = tools if tools is not None else []
mcp_servers_list = []
if GOOGLE_MAPS_API_KEY and os.path.exists(mcp_script_path):
mcp_servers_list.append(
mcp.MCPServerStdio(
command='node',
args=[mcp_script_path],
env={'GOOGLE_MAPS_API_KEY': GOOGLE_MAPS_API_KEY}
)
)
else:
logger.warning("Google Maps MCP server not configured due to missing API key or script path.")
super().__init__(
instructions=final_instructions,
allow_interruptions=True,
chat_ctx=chat_ctx,
mcp_servers=mcp_servers_list,
tools=all_tools
)
if not self.llm:
logger.error("GoAgentLivekit initialized, but LLM might be missing if API key was not provided to plugin.")
async def llm_node(self, chat_ctx: ChatContext, tools: list, model_settings: dict):
"""Override the llm_node to log tool calls or add custom behavior."""
tool_call_detected_this_turn = False
async for chunk in super().llm_node(chat_ctx, tools, model_settings):
if isinstance(chunk, ChatChunk) and chunk.delta and chunk.delta.tool_calls and not tool_call_detected_this_turn:
tool_call_detected_this_turn = True
logger.info("GoAgentLivekit: LLM is attempting to call a tool. Informing user.")
if hasattr(self, 'session') and self.session is not None:
self.session.say("Okay, let me check that for you.")
else:
logger.warning("Agent has no session to 'say' through during tool call detection.")
yield chunk
async def on_enter(self):
# when the agent is added to the session, we'll initiate the conversation by
# using the LLM to generate a reply
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
"""Main entrypoint for the LiveKit Go Agent application."""
logger.info(f"Go Agent LiveKit starting entrypoint for Job ID: {getattr(ctx.job, 'id', 'unknown')}")
await ctx.connect()
logger.info(f"Successfully connected to LiveKit room: {ctx.room.name if ctx.room else 'N/A'}")
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-2", language="en-US", api_key=os.environ.get('DEEPGRAM_API_KEY')),
llm=openai.LLM(model="gpt-4o", api_key=OPENAI_API_KEY),
tts=openai.TTS(voice="alloy", api_key=OPENAI_API_KEY),
turn_detection=MultilingualModel(),
)
logger.info("AgentSession configured with Google Maps MCP server.")
agent = GoAgent(chat_ctx=session._chat_ctx)
logger.info("GoAgentLivekit instantiated.")
logger.info(f"Starting AgentSession with agent for room: {ctx.room.name if ctx.room else 'N/A'}")
await session.start(agent=agent, room=ctx.room)
logger.info("AgentSession started. GoAgentLivekit is now running.")
if __name__ == "__main__":
logger.info("Starting Go Agent LiveKit application via cli.run_app.")
if not os.environ.get('DEEPGRAM_API_KEY'):
logger.warning("DEEPGRAM_API_KEY not found in environment. STT plugin may fail.")
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

View file

@ -0,0 +1,152 @@
import os
import sys
import logging
from dotenv import load_dotenv
from livekit.agents import JobContext, WorkerOptions, cli, function_tool, RunContext
from livekit.agents import mcp
from livekit.agents.llm import ChatChunk
from livekit.agents.voice import Agent, AgentSession
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from typing import Optional, List
from livekit.agents import tts
from livekit.agents.types import NOT_GIVEN
from livekit.agents.utils.misc import is_given
from pydantic import BaseModel, Field
from livekit.agents import ChatContext
logger = logging.getLogger("agent-spotify-official")
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
_this_file_dir = os.path.dirname(os.path.abspath(__file__))
_stone_ui_dir = os.path.abspath(os.path.join(_this_file_dir, '..', '..'))
if _stone_ui_dir not in sys.path:
sys.path.insert(0, _stone_ui_dir)
# Removed ANTHROPIC_API_KEY check as it seems unrelated to this OpenAI-based agent.
from engine.config import settings
# --- Spotify Tool Input Models (Based on spotify-mcp-server README) ---
class PlayMusicInput(BaseModel):
uri: Optional[str] = Field(None, description="Spotify URI of the item to play (e.g., spotify:track:...). Overrides type and id.")
type: Optional[str] = Field(None, description="Type of item to play (track, album, artist, playlist)")
id: Optional[str] = Field(None, description="Spotify ID of the item to play")
deviceId: Optional[str] = Field(None, description="ID of the device to play on (optional)")
# Add other input models here as needed (e.g., SearchSpotifyInput, PlaylistInput etc.)
# --- Configure Spotify MCP Server ---
spotify_mcp_server = None
# Define the path to the BUILT MCP server script
# IMPORTANT: Ensure the MCP server is built (npm run build) and authenticated (npm run auth)
mcp_script_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'tools', 'mcp', 'spotify', 'build', 'index.js'
))
spotify_config_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'tools', 'mcp', 'spotify', 'spotify-config.json'
))
if not os.path.exists(mcp_script_path):
logger.error(f"❌ Spotify MCP script not found at {mcp_script_path}. Make sure you've run 'npm install && npm run build' in the server directory.")
logger.warning("⚠️ Spotify tools will be unavailable.")
elif not os.path.exists(spotify_config_path):
logger.error(f"❌ Spotify config file not found at {spotify_config_path}. Make sure you've run 'npm run auth' after setting credentials.")
logger.warning("⚠️ Spotify tools will likely be unavailable due to missing auth.")
else:
# Check if config contains tokens (basic check)
try:
with open(spotify_config_path, 'r') as f:
config_content = f.read()
if 'accessToken' not in config_content or 'refreshToken' not in config_content or 'run-npm auth' in config_content:
logger.warning(f"⚠️ Spotify config file at {spotify_config_path} seems incomplete or unauthenticated. Run 'npm run auth'.")
# We still configure the server, but it might fail at runtime
else:
logger.info("✅ Spotify config file seems authenticated.")
except Exception as e:
logger.error(f"Error reading Spotify config {spotify_config_path}: {e}")
logger.info(f"📂 Configuring Spotify MCP server with script: {mcp_script_path}")
spotify_mcp_server = mcp.MCPServerStdio(
'node', # Command to run the server
args=[mcp_script_path], # Argument is the script path
# No specific env vars needed here, reads from spotify-config.json
env={},
client_session_timeout_seconds=5*60
)
logger.info("✅ Spotify MCP Server configured (runtime auth check still needed).")
class ListenAgent(Agent):
"""A LiveKit agent that uses MCP tools from one or more MCP servers."""
def __init__(self,
chat_ctx: ChatContext,
instructions: Optional[str] = None,
tts: Optional[tts.TTS] = NOT_GIVEN,
tools: Optional[List[function_tool]] = None):
final_instructions = instructions if instructions is not None else \
("You are the Listen Agent, specialized in controlling Spotify music playback. " +
"You MUST use the available tools to fulfill user requests related to Spotify. " +
"Available tools include 'playMusic', and potentially others like 'searchSpotify', 'pausePlayback', etc.\n\n" +
"RULE FOR MUSIC REQUESTS: When a user asks to play music, search for music, control playback (pause, skip, etc.), " +
"manage playlists, or ask what's playing, you MUST use the appropriate Spotify tool (like 'playMusic'). " +
"Be precise with parameters like 'uri' or 'type' and 'id'. Infer parameters from the user query. If essential info is missing (like what to play), ask the user.\n\n" +
"RULE FOR TOOL RESULTS: After a tool is successfully executed, you MUST confirm the action to the user (e.g., 'Okay, playing \'Bohemian Rhapsody\' now.'). " +
"If a tool fails or returns an error, inform the user clearly. " +
"If your task is complete or the user asks for something outside your Spotify capabilities (e.g., math, calendar), " +
"you MUST use the 'delegate_to_router_agent' tool to return to the main assistant."
)
all_tools = tools if tools is not None else []
active_mcp_servers = []
if spotify_mcp_server is not None:
active_mcp_servers.append(spotify_mcp_server)
super().__init__(
instructions=final_instructions,
chat_ctx=chat_ctx,
allow_interruptions=True,
mcp_servers=active_mcp_servers, # MODIFIED: Pass filtered list
tools=all_tools # Pass the tools to the parent Agent class
)
async def llm_node(self, chat_ctx, tools, model_settings):
"""Override the llm_node to say a message when a tool call is detected."""
tool_call_detected = False
async for chunk in super().llm_node(chat_ctx, tools, model_settings):
if isinstance(chunk, ChatChunk) and chunk.delta and chunk.delta.tool_calls and not tool_call_detected:
tool_call_detected = True
# Use self.session.say() to make the agent speak, only if TTS is configured
if self.tts: # Check if the agent has a TTS instance
self.session.say("Sure, I'll check that for you.")
yield chunk
async def on_enter(self):
# when the agent is added to the session, we'll initiate the conversation by
# using the LLM to generate a reply
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
"""Main entrypoint for the LiveKit agent application."""
await ctx.connect()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-2", language="en-US"),
llm=openai.LLM(model="gpt-4o"),
tts=openai.TTS(voice="alloy"),
turn_detection=MultilingualModel(),
)
agent = ListenAgent(chat_ctx=session._chat_ctx)
await session.start(agent=agent, room=ctx.room)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

View file

@ -0,0 +1,220 @@
import asyncio
import logging
import os
from typing import List, Dict, Any, Annotated
from pathlib import Path
import aiohttp
from dotenv import load_dotenv
from livekit.agents import (
JobContext,
JobProcess,
WorkerOptions,
cli,
llm,
Agent,
AgentSession
)
from livekit import rtc, api
from livekit.plugins import deepgram, openai, silero
from mem0 import AsyncMemoryClient
# Load environment variables
load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env')
# Configure logging
logger = logging.getLogger("memory-assistant")
logger.setLevel(logging.INFO)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Define a global user ID for simplicity
USER_ID = "voice_user"
# Initialize Mem0 memory client
mem0 = AsyncMemoryClient()
async def _enrich_with_memory(last_user_msg: llm.ChatMessage, chat_ctx_to_modify: llm.ChatContext):
"""Add memories and Augment chat context with relevant memories"""
if not last_user_msg or not last_user_msg.text_content or not last_user_msg.text_content.strip():
logger.info("No valid last user message content to process for memory.")
return
try:
# Ensure last_user_msg.text_content is a string for mem0
content_str = last_user_msg.text_content
if not content_str or not content_str.strip():
logger.info("User message content is empty after getting text_content.")
return
logger.info(f"[Mem0] Attempting to add memory for USER_ID '{USER_ID}': '{content_str}'")
try:
add_response = await mem0.add(
[{"role": "user", "content": content_str}],
user_id=USER_ID
)
logger.info(f"[Mem0] Successfully added memory. Response: {add_response}")
except Exception as e:
logger.error(f"[Mem0] Error adding memory: {e}", exc_info=True)
# Decide if we should return or continue to search with potentially stale memory
# For now, we'll continue to search.
logger.info(f"[Mem0] Attempting to search memories for USER_ID '{USER_ID}' with query: '{content_str}'")
results = []
try:
results = await mem0.search(
content_str,
user_id=USER_ID,
)
logger.info(f"[Mem0] Search complete. Found {len(results)} results: {results}")
except Exception as e:
logger.error(f"[Mem0] Error searching memory: {e}", exc_info=True)
if results:
memories_text = ' '.join([result["memory"] for result in results if result.get("memory")])
if memories_text.strip():
logger.info(f"Enriching with memory: {memories_text}")
# Create the RAG message. Ensure content is a list of ChatContent (string is fine).
rag_msg_content = f"Relevant Memory from past interactions: {memories_text}\\nUser's current query is below."
rag_msg = llm.ChatMessage(role="system", content=[rag_msg_content])
# Insert RAG message before the last user message in the context's items list
inserted = False
# Access items via the .items property
target_items_list = chat_ctx_to_modify.items
for i in range(len(target_items_list) - 1, -1, -1):
if target_items_list[i] is last_user_msg: # Check object identity
target_items_list.insert(i, rag_msg)
inserted = True
logger.info(f"Inserted RAG message at index {i} in .items list")
break
if not inserted:
logger.warning("Could not find last user message by identity in .items list. Appending RAG message.")
if target_items_list and target_items_list[-1] is last_user_msg:
target_items_list.insert(len(target_items_list)-1, rag_msg)
else:
target_items_list.append(rag_msg)
except Exception as e:
logger.error(f"Error during memory enrichment: {e}", exc_info=True)
class MemoryAgent(Agent):
def __init__(self, chat_ctx: llm.ChatContext):
super().__init__(
chat_ctx=chat_ctx,
instructions="You are a helpful voice assistant that can remember past interactions."
)
# System prompt is now managed by the chat_ctx passed to super().__init__
async def on_enter(self):
logger.info("MemoryAgent entered room.")
try:
# Say initial greeting
await self.session.say(
"Hello! I'm George. Can I help you plan an upcoming trip? ",
allow_interruptions=True
)
# Start the main interaction loop
self.session.generate_reply()
logger.info("MemoryAgent started generate_reply loop.")
except Exception as e:
logger.error(f"Error in MemoryAgent.on_enter: {e}", exc_info=True)
async def on_user_turn_completed(self, turn_ctx: llm.ChatContext, new_message: llm.ChatMessage):
logger.info(f"MemoryAgent.on_user_turn_completed called with new_message: '{new_message.text_content}'")
if not new_message or not new_message.content or not new_message.text_content.strip():
logger.info("No valid new_message content for memory enrichment.")
return
# The turn_ctx provided by the hook is the context *before* the new_message.
# We need to add the new_message to it before enrichment,
# so _enrich_with_memory can potentially place the RAG message *before* it.
# The AgentActivity will use this modified turn_ctx for the LLM call.
# It will also separately add the new_message to the agent's main context.
# Let's make a working copy if direct modification isn't intended for the passed turn_ctx,
# though the name temp_mutable_chat_ctx in AgentActivity suggests it's okay.
# For safety and clarity in _enrich_with_memory, we'll operate on turn_ctx.
# Add the new user message to the context that will be enriched
turn_ctx.items.append(new_message) # new_message is already part of the main context by AgentActivity
# but for _enrich_with_memory to find it (as last_user_msg)
# and insert RAG before it in *this specific context copy*, it needs to be here.
# AgentActivity also adds this new_message to the agent's _chat_ctx separately.
logger.info(f"Context before enrichment (with new_message added): {turn_ctx.items}")
# Enrich the context (which now includes new_message) with memories
# _enrich_with_memory will find new_message as the last user message
# and insert the RAG system message just before it in turn_ctx.items
await _enrich_with_memory(new_message, turn_ctx)
logger.info(f"Context after enrichment: {turn_ctx.items}")
# No need to call self.update_chat_ctx() here.
# The AgentActivity will use the modified turn_ctx for the LLM.
def prewarm_process(proc: JobProcess):
logger.info("Prewarming VAD model.")
# Preload silero VAD in memory to speed up session start
proc.userdata["vad"] = silero.VAD.load()
logger.info("VAD model prewarmed.")
async def entrypoint(ctx: JobContext):
logger.info("Agent entrypoint started.")
try:
await ctx.connect()
logger.info("Connected to LiveKit room.")
# Define initial system context for the LLM
initial_ctx = llm.ChatContext()
system_prompt_text = (
"""
You are a helpful voice assistant.
You are a travel guide named George and will help the user to plan a travel trip of their dreams.
You should help the user plan for various adventures like work retreats, family vacations or solo backpacking trips.
You should be careful to not suggest anything that would be dangerous, illegal or inappropriate.
You can remember past interactions and use them to inform your answers.
Use semantic memory retrieval to provide contextually relevant responses.
When relevant memory is provided, use it to enhance your response.
"""
)
initial_ctx.add_message(role="system", content=system_prompt_text)
logger.info("Initial system context defined.")
# VAD model loading logic remains the same
vad_model = ctx.proc.userdata.get("vad")
if not vad_model:
logger.info("VAD not prewarmed or not found in userdata, loading now.")
vad_model = silero.VAD.load()
else:
logger.info("Using prewarmed VAD model.")
custom_agent = MemoryAgent(chat_ctx=initial_ctx)
# AgentSession constructor does NOT take 'agent'
session = AgentSession(
vad=vad_model,
stt=deepgram.STT(model="nova-2", language="en"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="alloy"),
)
logger.info("AgentSession created.")
# Agent is passed to session.start()
await session.start(agent=custom_agent, room=ctx.room)
logger.info("Agent session started with MemoryAgent.")
except Exception as e:
logger.error(f"Error in agent entrypoint: {e}", exc_info=True)
# Run the application
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint,
prewarm_fnc=prewarm_process,
agent_name="mem0-voice-agent")) # Consistent agent name

View file

@ -0,0 +1,257 @@
import sys # Import sys for sys.exit
import logging
import os
from pathlib import Path
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from dotenv import load_dotenv
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
logger = logging.getLogger("stone-agent")
from livekit.agents import (
Agent,
AgentSession,
ChatContext,
JobContext,
WorkerOptions,
cli,
mcp,
RunContext,
function_tool,
)
from livekit.agents.voice.agent import ModelSettings # Import ModelSettings
from livekit.agents.llm import (
ChatRole,
LLM,
ChatMessage
)
from livekit.plugins import deepgram, openai, silero, anthropic
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from typing import List, Optional # Ensure List and Optional are imported for tool type hints
# Import the original FunctionAgents from the official agent files
# These files should be in the same directory as router_agent.py
from ask_agent import AskAgent
from calc_agent import CalculatorAgent
from calendar_agent import CalendarAgent
from caller_agent import CallerAgent
from contact_agent import ContactAgent
from gmail_agent import GmailAgent
from go_agent import GoAgent
from listen_agent import ListenAgent
# from mem_agent import MemoryAgent
logger = logging.getLogger("router-agent")
load_dotenv()
# Determine the absolute path for server scripts relative to this file
_current_dir = os.path.dirname(os.path.abspath(__file__))
@function_tool
async def delegate_to_router_agent(context: RunContext, original_query: str = "User wants to talk about something else."):
"""
Call this function to delegate the conversation back to the main RouterAgent.
This is used when your current task is complete, or the user asks for functionality
that you (the specialist agent) do not provide.
Args:
original_query: A brief description of why the delegation is happening or the user's last relevant query.
"""
logger.info(f"Specialist Agent: Delegating back to RouterAgent. Reason/Query: '{original_query}'")
# Try to access _chat_ctx via context.session, as context.agent was problematic
if not hasattr(context, 'session') or context.session is None:
logger.error("delegate_to_router_agent: RunContext does not have a valid 'session' attribute.")
# This is a critical failure for context propagation.
# Depending on desired behavior, could raise an error or attempt a recovery (though recovery is hard here).
# For now, we'll let it fail if it tries to access _chat_ctx on a None session,
# or re-raise a more specific error.
raise AttributeError("RunContext is missing the session attribute, cannot retrieve ChatContext.")
return RouterAgent(chat_ctx=context.session._chat_ctx), "Okay, let me switch you back to the main assistant."
class RouterAgent(Agent):
"""Routes user queries to specialized agents."""
def __init__(self, chat_ctx: ChatContext):
super().__init__(
instructions="""
You are a router agent. Your primary responsibility is to understand the user's voice query
and delegate it to the most appropriate specialist agent.
- If the query is primarily about mathematics, calculations, arithmetic, or numbers,
you MUST use the 'delegate_to_math_agent' tool.
- For general knowledge questions, facts, explanations, requests to 'search the web', 'make a web search',
or any other type of query not strictly mathematical, not about specific addresses/locations, and not covered by other specialists,
you MUST use the 'delegate_to_perplexity_agent' tool.
- If the query involves calendar events, scheduling, creating appointments, or asking about your schedule,
you MUST use the 'delegate_to_calendar_agent' tool.
- If the user explicitly asks to make a phone call,
you MUST use the 'delegate_to_caller_agent' tool.
- If the query is about finding contact information (like phone numbers or email addresses of people),
you MUST use the 'delegate_to_contact_agent' tool.
- For tasks related to managing emails (reading, sending, searching Gmail),
you MUST use the 'delegate_to_gmail_agent' tool.
- If the query is about locations, finding places, getting directions, looking up addresses, or anything map-related,
you MUST use the 'delegate_to_go_agent' tool.
- If the user wants to play music, control music playback, or anything related to Spotify,
you MUST use the 'delegate_to_listen_agent' tool.
Listen carefully to the user's query and make a clear decision.
Do not attempt to answer the question yourself. Your sole job is to route.
If uncertain, you can ask one clarifying question to determine the correct agent, but prefer to route directly if possible.
""",
allow_interruptions=True,
chat_ctx=chat_ctx
)
async def on_enter(self):
"""Called when the RouterAgent starts. It will wait for user input."""
logger.info("RouterAgent entered. Waiting for user query.")
self.session.generate_reply()
@function_tool
async def delegate_to_math_agent(self, query: str):
"""
Call this function to delegate a math-related query to the MathSpecialistAgent.
Args:
query: The user's original voice query that is mathematical in nature.
"""
logger.info(f"RouterAgent: Delegating to MathSpecialistAgent for query: '{query}'")
# Pass the delegate_to_router_agent tool to the CalculatorAgent
math_agent = CalculatorAgent(
chat_ctx=self.session._chat_ctx,
tools=[delegate_to_router_agent] # Pass the tool
)
return math_agent, "Okay, I'll connect you with my math specialist for that."
@function_tool
async def delegate_to_perplexity_agent(self, query: str):
"""
Call this function to delegate a query that needs to perform a web search to the Perplexity Agent.
Args:
query: The user's original voice query.
"""
logger.info(f"RouterAgent: Delegating to AskAgent (for perplexity tasks) for query: '{query}'")
try:
perplexity_agent = AskAgent(
chat_ctx=self.session._chat_ctx,
tools=[delegate_to_router_agent] # Pass the tool
)
return perplexity_agent, "Alright, let me get my knowledge expert to help with that question."
except AttributeError as e:
logger.error(f"Unexpected AttributeError: {e}")
raise
@function_tool
async def delegate_to_calendar_agent(self, query: str):
"""
Call this function to delegate a query about calendar events, scheduling, or appointments to the CalendarAgent.
Args:
query: The user's original voice query related to calendar.
"""
logger.info(f"RouterAgent: Delegating to CalendarAgent for query: '{query}'")
calendar_agent = CalendarAgent(
chat_ctx=self.session._chat_ctx,
tools=[delegate_to_router_agent] # Pass the tool
)
return calendar_agent, "Okay, let me check your calendar."
@function_tool
async def delegate_to_caller_agent(self, query: str):
"""
Call this function to delegate a request to make a phone call to the CallerAgent.
Args:
query: The user's original voice query about making a call.
"""
logger.info(f"RouterAgent: Delegating to CallerAgent for query: '{query}'")
caller_agent = CallerAgent(
chat_ctx=self.session._chat_ctx,
tools=[delegate_to_router_agent] # Pass the tool
)
return caller_agent, "Sure, I can try to make that call for you."
@function_tool
async def delegate_to_contact_agent(self, query: str):
"""
Call this function to delegate a query about finding or managing contact information to the ContactAgent.
Args:
query: The user's original voice query related to contacts.
"""
logger.info(f"RouterAgent: Delegating to ContactAgent for query: '{query}'")
contact_agent = ContactAgent(
chat_ctx=self.session._chat_ctx,
tools=[delegate_to_router_agent] # Pass the tool
)
return contact_agent, "Let me look up that contact information for you."
@function_tool
async def delegate_to_gmail_agent(self, query: str):
"""
Call this function to delegate an email-related query (reading, sending, managing emails) to the GmailAgent.
Args:
query: The user's original voice query related to Gmail.
"""
logger.info(f"RouterAgent: Delegating to GmailAgent for query: '{query}'")
gmail_agent = GmailAgent(
chat_ctx=self.session._chat_ctx,
tools=[delegate_to_router_agent] # Pass the tool
)
return gmail_agent, "Okay, I'll check your emails."
@function_tool
async def delegate_to_go_agent(self, query: str):
"""
Call this function to delegate a query about locations, directions, maps, or places to the GoAgent.
Args:
query: The user's original voice query related to maps or navigation.
"""
logger.info(f"RouterAgent: Delegating to GoAgent for query: '{query}'")
go_agent = GoAgent(
chat_ctx=self.session._chat_ctx,
tools=[delegate_to_router_agent] # Pass the tool
)
return go_agent, "Let me get my navigation expert for that."
@function_tool
async def delegate_to_listen_agent(self, query: str):
"""
Call this function to delegate a request to play or control music (Spotify) to the ListenAgent.
Args:
query: The user's original voice query related to music or Spotify.
"""
logger.info(f"RouterAgent: Delegating to ListenAgent for query: '{query}'")
listen_agent = ListenAgent(
chat_ctx=self.session._chat_ctx,
tools=[delegate_to_router_agent] # Pass the tool
)
return listen_agent, "Okay, let's get some music playing."
async def entrypoint(ctx: JobContext):
"""Main entrypoint for the multi-agent LiveKit application."""
await ctx.connect()
logger.info("Router agent connected to LiveKit.")
session = AgentSession[None](
vad=silero.VAD.load(),
stt=openai.STT(model="gpt-4o-mini-transcribe", detect_language=True),
tts=openai.TTS(voice="alloy", model="tts-1-hd"),
llm=openai.LLM(model="gpt-4o"),
turn_detection=MultilingualModel()
)
logger.info("AgentSession configured. MCP servers will be managed by individual specialist agents.")
initial_agent = RouterAgent(chat_ctx=session._chat_ctx)
await session.start(agent=initial_agent, room=ctx.room)
logger.info("RouterAgent session started.")
if __name__ == "__main__":
# Setup basic logging if running directly
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
try:
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
except SystemExit: # Allow sys.exit() to pass through without logging as critical
raise
except Exception as e:
logger.critical(f"Unhandled exception at top level: {e}", exc_info=True)
sys.exit(1) # Ensure exit with error code

View file

@ -0,0 +1,88 @@
import logging
import os
from dotenv import load_dotenv
from pathlib import Path
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp, ChatContext
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from dotenv import load_dotenv
from livekit.agents.llm import function_tool
from livekit import api, rtc
from livekit.agents import get_job_context
from livekit.agents import RunContext
# Add this function definition anywhere
async def hangup_call():
ctx = get_job_context()
if ctx is None:
# Not running in a job context
return
await ctx.api.room.delete_room(
api.DeleteRoomRequest(
room=ctx.room.name,
)
)
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
logger = logging.getLogger("mcp-agent")
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
class MyAgent(Agent):
def __init__(self, chat_ctx: ChatContext) -> None:
super().__init__(
instructions=(
"You can have phone calls. The interface is voice-based: "
"accept spoken user queries and respond with synthesized speech."
),
chat_ctx=chat_ctx
)
@function_tool
async def end_call(self, ctx: RunContext):
"""Called when the user wants to end the call"""
# let the agent finish speaking
current_speech = ctx.session.current_speech
if current_speech:
await current_speech.wait_for_playout()
await hangup_call()
@function_tool
async def end_call_finished_by_you(self, ctx: RunContext):
"""Called when you have accomplished your task and can end the call safely"""
# let the agent finish speaking
current_speech = ctx.session.current_speech
if current_speech:
await current_speech.wait_for_playout()
await hangup_call()
async def on_enter(self):
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
await ctx.connect()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="ash"),
turn_detection=MultilingualModel(),
)
await session.start(agent=MyAgent(chat_ctx=session._chat_ctx), room=ctx.room)
await session.generate_reply(
instructions="Greet the user and offer your assistance."
)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint,
agent_name="my-telephony-agent"))

View file

@ -0,0 +1,87 @@
import logging
import os
from dotenv import load_dotenv
from pathlib import Path
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp, ChatContext
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from dotenv import load_dotenv
from livekit import api
import json
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
logger = logging.getLogger("mcp-agent")
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
class MyAgent(Agent):
def __init__(self, chat_ctx: ChatContext) -> None:
super().__init__(
instructions=(
"You can have phone calls. The interface is voice-based: "
"accept spoken user queries and respond with synthesized speech."
),
chat_ctx=chat_ctx
)
async def on_enter(self):
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
await ctx.connect()
# If a phone number was provided, then place an outbound call
# By having a condition like this, you can use the same agent for inbound/outbound telephony as well as web/mobile/etc.
dial_info = json.loads(ctx.job.metadata)
phone_number = dial_info["phone_number"]
# The participant's identity can be anything you want, but this example uses the phone number itself
sip_participant_identity = phone_number
if phone_number is not None:
# The outbound call will be placed after this method is executed
try:
await ctx.api.sip.create_sip_participant(api.CreateSIPParticipantRequest(
# This ensures the participant joins the correct room
room_name=ctx.room.name,
# This is the outbound trunk ID to use (i.e. which phone number the call will come from)
# You can get this from LiveKit CLI with `lk sip outbound list`
sip_trunk_id=os.environ.get("TWILIO_SIP_TRUNK_ID"),
# The outbound phone number to dial and identity to use
sip_call_to=phone_number,
participant_identity=sip_participant_identity,
# This will wait until the call is answered before returning
wait_until_answered=True,
))
print("call picked up successfully")
except api.TwirpError as e:
print(f"error creating SIP participant: {e.message}, "
f"SIP status: {e.metadata.get('sip_status_code')} "
f"{e.metadata.get('sip_status')}")
ctx.shutdown()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="ash"),
turn_detection=MultilingualModel(),
)
await session.start(agent=MyAgent(chat_ctx=session._chat_ctx), room=ctx.room)
if phone_number is None:
await session.generate_reply(
instructions="Greet the user and offer your assistance."
)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint,
agent_name="my-telephony-agent"))

View file

@ -0,0 +1 @@
# This file makes Python treat the directory as a package.

View file

@ -0,0 +1,13 @@
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
# Add any specific settings required by the application here
# For example:
# spotify_client_id: str | None = None
# spotify_client_secret: str | None = None
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
settings = Settings()

View file

@ -0,0 +1,74 @@
[project]
name = "engine"
version = "0.1.0"
description = "Backend server for Stone OS with Pydantic-AI agents"
authors = [
{ name = "Stone AIOS Team", email = "dev@example.com" }, # Placeholder author
]
dependencies = [
"fastapi>=0.103.1",
"uvicorn[standard]>=0.23.2", # Added [standard] for better performance
"pydantic>=2.4.2",
"pydantic-settings>=2.0.3",
"anthropic>=0.15.0",
"logfire>=0.19.0",
"httpx>=0.25.0", # Needed for tests and EmailEngine API communication
# asyncio is built-in
"python-dotenv>=1.0.0",
"typing_extensions>=4.8.0",
# Add missing dependencies for memory system
"aiofiles>=0.8.0",
"filelock>=3.0.0",
"apscheduler",
# Add scheduler for background tasks
# "apscheduler>=3.0.0,<4.0.0", # Added APScheduler V3
# MCP server dependency - assuming the Python SDK is needed if direct node execution fails
# 'mcp-sdk @ git+https://github.com/modelcontextprotocol/python-sdk.git', # Uncomment if needed
"spotipy>=2.0.0", # Added for Listen Agent Spotify integration
# Add Google API client libraries for Connect Agent direct integration
"google-api-python-client>=2.0.0",
"google-auth-oauthlib>=0.7.0",
"google-auth-httplib2", # Relaxed constraint
# --- Added LiveKit Agents ---
"livekit-agents[mcp]~=1.0",
"livekit-plugins-openai", # Example, if OpenAI is used for LLM/TTS/STT
"livekit-plugins-deepgram", # Example, if Deepgram is used for STT
"livekit-plugins-silero", # For VAD
"livekit-plugins-cartesia", # Example, for TTS
"livekit-plugins-turn-detector", # For turn detection
# ----------------------------
]
requires-python = ">=3.13" # Updated minimum Python version for mcp-gsuite
readme = "README.md" # Assuming a README exists or will be created
license = { text = "MIT" } # Or choose appropriate license
# Add optional dependencies for development/testing
[project.optional-dependencies]
dev = [
"pytest>=7.0",
"pytest-asyncio>=0.20",
"respx>=0.20",
# Add other dev tools like linters (ruff, mypy) or formatters (black) here if desired
]
[project.urls]
Homepage = "https://github.com/stone-ui/stone-aios" # Placeholder URL
Repository = "https://github.com/stone-ui/stone-aios" # Placeholder URL
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.uv]
# Optional: Specify settings for uv if needed
[tool.hatch.metadata]
allow-direct-references = true
[tool.hatch.build.targets.wheel]
# Explicitly include directories containing the package code relative to pyproject.toml
# Assuming these contain the core library code. Adjust if needed.
packages = ["engine/agents", "engine/api", "engine/config", "engine/tools"] # Adjusted paths
# If main.py and __init__.py should also be part of the package, might need adjustment
# e.g., include '.' or specific file paths if they are not just scripts.

View file

@ -0,0 +1,401 @@
# This file was autogenerated by uv via the following command:
# uv pip compile pyproject.toml -o requirements.txt
aiofiles==24.1.0
# via
# engine (pyproject.toml)
# livekit
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.18
livekit-agents[anthropic]~=1.0
# via
# livekit-agents
# livekit-api
aiosignal==1.3.2
# via aiohttp
annotated-types==0.7.0
# via pydantic
anthropic==0.51.0
# via engine (pyproject.toml)
anyio==4.9.0
# via
# anthropic
# httpx
# mcp
# openai
# sse-starlette
# starlette
# watchfiles
apscheduler==3.11.0
# via engine (pyproject.toml)
attrs==25.3.0
# via aiohttp
av==14.3.0
# via livekit-agents
cachetools==5.5.2
# via google-auth
certifi==2025.4.26
# via
# httpcore
# httpx
# requests
cffi==1.17.1
# via sounddevice
charset-normalizer==3.4.2
# via requests
click==8.2.0
# via
# livekit-agents
# uvicorn
colorama==0.4.6
# via livekit-agents
coloredlogs==15.0.1
# via onnxruntime
deprecated==1.2.18
# via
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-semantic-conventions
distro==1.9.0
# via
# anthropic
# openai
docstring-parser==0.16
# via livekit-agents
eval-type-backport==0.2.2
# via livekit-agents
executing==2.2.0
# via logfire
fastapi==0.115.12
# via engine (pyproject.toml)
filelock==3.18.0
# via
# engine (pyproject.toml)
# huggingface-hub
# transformers
flatbuffers==25.2.10
# via onnxruntime
frozenlist==1.6.0
# via
# aiohttp
# aiosignal
fsspec==2025.3.2
# via huggingface-hub
google-api-core==2.24.2
# via google-api-python-client
google-api-python-client==2.169.0
# via engine (pyproject.toml)
google-auth==2.40.1
# via
# google-api-core
# google-api-python-client
# google-auth-httplib2
# google-auth-oauthlib
google-auth-httplib2==0.2.0
# via
# engine (pyproject.toml)
# google-api-python-client
google-auth-oauthlib==1.2.2
# via engine (pyproject.toml)
googleapis-common-protos==1.70.0
# via
# google-api-core
# opentelemetry-exporter-otlp-proto-http
h11==0.16.0
# via
# httpcore
# uvicorn
httpcore==1.0.9
# via httpx
httplib2==0.22.0
# via
# google-api-python-client
# google-auth-httplib2
httptools==0.6.4
# via uvicorn
httpx==0.28.1
# via
# engine (pyproject.toml)
# anthropic
# mcp
# openai
httpx-sse==0.4.0
# via mcp
huggingface-hub==0.31.2
# via
# tokenizers
# transformers
humanfriendly==10.0
# via coloredlogs
idna==3.10
# via
# anyio
# httpx
# requests
# yarl
importlib-metadata==8.6.1
# via opentelemetry-api
jinja2==3.1.6
# via livekit-plugins-turn-detector
jiter==0.9.0
# via
# anthropic
# openai
livekit==1.0.7
# via livekit-agents
livekit-agents==1.0.22
# via
# engine (pyproject.toml)
# livekit-plugins-cartesia
# livekit-plugins-deepgram
# livekit-plugins-openai
# livekit-plugins-silero
# livekit-plugins-turn-detector
livekit-api==1.0.2
# via livekit-agents
livekit-plugins-cartesia==1.0.21
# via engine (pyproject.toml)
livekit-plugins-deepgram==1.0.21
# via engine (pyproject.toml)
livekit-plugins-openai==1.0.21
# via engine (pyproject.toml)
livekit-plugins-silero==1.0.21
# via engine (pyproject.toml)
livekit-plugins-turn-detector==1.0.21
# via engine (pyproject.toml)
livekit-protocol==1.0.3
# via
# livekit-agents
# livekit-api
logfire==3.16.0
# via engine (pyproject.toml)
markdown-it-py==3.0.0
# via rich
markupsafe==3.0.2
# via jinja2
mcp==1.9.0
# via livekit-agents
mdurl==0.1.2
# via markdown-it-py
mpmath==1.3.0
# via sympy
multidict==6.4.3
# via
# aiohttp
# yarl
nest-asyncio==1.6.0
# via livekit-agents
numpy==2.2.5
# via
# livekit
# livekit-agents
# livekit-plugins-deepgram
# livekit-plugins-silero
# livekit-plugins-turn-detector
# onnxruntime
# transformers
oauthlib==3.2.2
# via requests-oauthlib
onnxruntime==1.22.0
# via
# livekit-plugins-silero
# livekit-plugins-turn-detector
openai==1.78.1
# via livekit-plugins-openai
opentelemetry-api==1.33.0
# via
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-instrumentation
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-common==1.33.0
# via opentelemetry-exporter-otlp-proto-http
opentelemetry-exporter-otlp-proto-http==1.33.0
# via logfire
opentelemetry-instrumentation==0.54b0
# via logfire
opentelemetry-proto==1.33.0
# via
# opentelemetry-exporter-otlp-proto-common
# opentelemetry-exporter-otlp-proto-http
opentelemetry-sdk==1.33.0
# via
# logfire
# opentelemetry-exporter-otlp-proto-http
opentelemetry-semantic-conventions==0.54b0
# via
# opentelemetry-instrumentation
# opentelemetry-sdk
packaging==25.0
# via
# huggingface-hub
# onnxruntime
# opentelemetry-instrumentation
# transformers
pillow==11.2.1
# via livekit-agents
propcache==0.3.1
# via
# aiohttp
# yarl
proto-plus==1.26.1
# via google-api-core
protobuf==5.29.4
# via
# google-api-core
# googleapis-common-protos
# livekit
# livekit-agents
# livekit-api
# livekit-protocol
# logfire
# onnxruntime
# opentelemetry-proto
# proto-plus
psutil==7.0.0
# via livekit-agents
pyasn1==0.6.1
# via
# pyasn1-modules
# rsa
pyasn1-modules==0.4.2
# via google-auth
pycparser==2.22
# via cffi
pydantic==2.11.4
# via
# engine (pyproject.toml)
# anthropic
# fastapi
# livekit-agents
# mcp
# openai
# pydantic-settings
pydantic-core==2.33.2
# via pydantic
pydantic-settings==2.9.1
# via
# engine (pyproject.toml)
# mcp
pygments==2.19.1
# via rich
pyjwt==2.10.1
# via
# livekit-agents
# livekit-api
pyparsing==3.2.3
# via httplib2
python-dotenv==1.1.0
# via
# engine (pyproject.toml)
# pydantic-settings
# uvicorn
python-multipart==0.0.20
# via mcp
pyyaml==6.0.2
# via
# huggingface-hub
# transformers
# uvicorn
redis==6.1.0
# via spotipy
regex==2024.11.6
# via transformers
requests==2.32.3
# via
# google-api-core
# huggingface-hub
# opentelemetry-exporter-otlp-proto-http
# requests-oauthlib
# spotipy
# transformers
requests-oauthlib==2.0.0
# via google-auth-oauthlib
rich==14.0.0
# via logfire
rsa==4.9.1
# via google-auth
safetensors==0.5.3
# via transformers
sniffio==1.3.1
# via
# anthropic
# anyio
# openai
sounddevice==0.5.1
# via livekit-agents
spotipy==2.25.1
# via engine (pyproject.toml)
sse-starlette==2.3.5
# via mcp
starlette==0.46.2
# via
# fastapi
# mcp
# sse-starlette
sympy==1.14.0
# via onnxruntime
tokenizers==0.21.1
# via transformers
tqdm==4.67.1
# via
# huggingface-hub
# openai
# transformers
transformers==4.51.3
# via livekit-plugins-turn-detector
types-protobuf==4.25.0.20240417
# via
# livekit
# livekit-agents
# livekit-api
# livekit-protocol
typing-extensions==4.13.2
# via
# engine (pyproject.toml)
# anthropic
# fastapi
# huggingface-hub
# livekit-agents
# logfire
# openai
# opentelemetry-sdk
# pydantic
# pydantic-core
# typing-inspection
typing-inspection==0.4.0
# via
# pydantic
# pydantic-settings
tzlocal==5.3.1
# via apscheduler
uritemplate==4.1.1
# via google-api-python-client
urllib3==2.4.0
# via
# requests
# spotipy
uvicorn==0.34.2
# via
# engine (pyproject.toml)
# mcp
uvloop==0.21.0
# via uvicorn
watchfiles==1.0.5
# via
# livekit-agents
# uvicorn
websockets==15.0.1
# via
# openai
# uvicorn
wrapt==1.17.2
# via
# deprecated
# opentelemetry-instrumentation
yarl==1.20.0
# via aiohttp
zipp==3.21.0
# via importlib-metadata

View file

@ -0,0 +1,22 @@
#!/bin/bash
# Activate the Python virtual environment
source .router_env/bin/activate
# Install required dependencies if needed
pip install librosa numpy
# Set the LiveKit server URL and API keys (update these with your actual values)
export LIVEKIT_URL="pass"
export LIVEKIT_API_KEY="pass"
export LIVEKIT_API_SECRET="pass"
# Set OpenAI API key (replace with your actual key)
export OPENAI_API_KEY="pass"
# Set the room name to match the one in token-server.js
export LIVEKIT_ROOM="stone-router-voice-agent"
# Start the agent
cd engine
python agents/stone_agent.py

View file

@ -0,0 +1,76 @@
# MCP Servers Directory
This directory contains all Model Context Protocol (MCP) servers used by the Stone AIOS engine.
## Directory Structure
- `perplexity/`: Perplexity API integration for web search
- `perplexity-ask/`: The MCP server for Perplexity's Ask functionality
- `spotify/`: Spotify API integration for music playback and control
- Additional MCP servers can be added in their own directories
## Important Notes
1. The code in `engine/agents/` is configured to look for MCP servers in this exact location (`engine/tools/mcp/`).
2. The MCP servers are initially defined as git submodules in `stone_aios/tools/mcp/` but are copied here during setup:
- The `start.sh` script copies the servers from their submodule location to this directory.
- It then builds the servers in this location to make them available to the engine.
3. When adding new MCP servers:
- Add them as submodules in `stone_aios/tools/mcp/`
- Update `start.sh` to copy and build them in `engine/tools/mcp/`
- Update the agent code to look for them in this location
## Usage
The MCP servers are automatically started when needed by the engine's agent code through the `run_mcp_servers()` context manager in Pydantic-AI.
# Model Context Protocol (MCP) Submodules
This directory contains various Model Context Protocol (MCP) implementations that Stone AIOS uses to interact with different services.
## Submodules
### Perplexity MCP
- Repository: https://github.com/ppl-ai/modelcontextprotocol.git
- Purpose: Provides integration with Perplexity's search functionality
### Spotify MCP
- Repository: https://github.com/varunneal/spotify-mcp.git
- Purpose: Enables interaction with Spotify's music service
### Basic Memory MCP
- Repository: https://github.com/basicmachines-co/basic-memory.git
- Purpose: Provides memory capabilities for agents
### Google Maps MCP
- Repository: (Google Maps implementation)
- Purpose: Enables interaction with Google Maps for location-based services
### Google Calendar MCP
- Repository: https://github.com/nspady/google-calendar-mcp.git
- Purpose: Provides integration with Google Calendar for managing events and schedules
### Calculator MCP Server
- Repository: https://github.com/githejie/mcp-server-calculator.git
- Purpose: Offers calculation capabilities through the MCP protocol
## Usage
These submodules are reference implementations that can be used by Stone AIOS tools. To update all submodules, run:
```bash
git submodule update --init --recursive
```
## Adding New MCP Implementations
To add a new MCP implementation:
1. Add it as a git submodule:
```
git submodule add <repository-url> tools/mcp/<service-name>
```
2. Update this README.md file to include information about the new submodule

View file

@ -0,0 +1,36 @@
from mcp.server.fastmcp import FastMCP
import logging
import os
# Setup logging to a file
# Adjust the log file path if necessary, perhaps to be relative to this script's location
# or a dedicated logs directory.
log_file_path = os.path.join(os.path.dirname(__file__), 'math_server_official.log')
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
handlers=[
logging.FileHandler(log_file_path, mode='w'), # 'w' to overwrite each run
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
mcp = FastMCP("Official Math Server 🚀")
@mcp.tool()
def add(a: int, b: int) -> int: # Changed return type hint to int
"""Add two numbers and return the result"""
logger.info(f"Executing add tool with a={a}, b={b}")
return a + b
@mcp.tool()
def multiply(a: int, b: int) -> int: # Changed return type hint to int
"""Multiply two numbers and return the result"""
logger.info(f"Executing multiply tool with a={a}, b={b}")
return a * b
if __name__ == "__main__":
logger.info(f"Starting Official MCP math_server.py with STDIO transport... Log file: {log_file_path}")
mcp.run(transport="stdio") # Ensure stdio transport is used as in server_stdio.py

View file

@ -0,0 +1,25 @@
FROM node:22.12-alpine AS builder
# Must be entire project because `prepare` script is run during `npm install` and requires all files.
COPY src/google-maps /app
COPY tsconfig.json /tsconfig.json
WORKDIR /app
RUN --mount=type=cache,target=/root/.npm npm install
RUN --mount=type=cache,target=/root/.npm-production npm ci --ignore-scripts --omit-dev
FROM node:22-alpine AS release
COPY --from=builder /app/dist /app/dist
COPY --from=builder /app/package.json /app/package.json
COPY --from=builder /app/package-lock.json /app/package-lock.json
ENV NODE_ENV=production
WORKDIR /app
RUN npm ci --ignore-scripts --omit-dev
ENTRYPOINT ["node", "dist/index.js"]

View file

@ -0,0 +1,54 @@
# Google Maps Integration for Stone AIOS
This module enables Stone AIOS to provide location information, directions, and other map-related services using Google Maps.
## Features
- **Location Search**: Find detailed information about places
- **Directions**: Get directions between locations with different transport modes
- **Distance Calculation**: Calculate distances and travel times
- **Place Details**: Get information about businesses, landmarks, etc.
## Requirements
- Google Cloud account with Maps API enabled
- Google Maps API key with the following APIs enabled:
- Maps JavaScript API
- Places API
- Directions API
- Distance Matrix API
- Geocoding API
## Configuration
1. Set up a Google Cloud project and enable the necessary Google Maps APIs
2. Create an API key and restrict it to the Google Maps APIs
3. Configure your `.env` file with:
```
GOOGLE_MAPS_API_KEY="your_google_maps_api_key"
```
## Integration Details
The Google Maps integration uses an MCP server implemented in JavaScript that runs as a subprocess when needed. This ensures the maps service only consumes resources when actively being used.
### Supported Commands
- "Where is the Eiffel Tower?" - Get location information
- "How do I get from New York to Boston?" - Get directions
- "How far is it from Los Angeles to San Francisco?" - Calculate distances
- "What restaurants are near me?" - Find nearby places (requires user location)
## Implementation Notes
The integration is implemented in `agents/stone_agent.py` within the `delegate_to_go_agent` function, which handles:
1. Verifying the presence of a valid Google Maps API key
2. Starting the Maps MCP server as a subprocess
3. Processing the query through Claude with map tools access
4. Returning structured results with location information
## Testing
Tests for the Google Maps integration are available in `tests/ai/test_maps_integration.py`.

View file

@ -0,0 +1,114 @@
# Google Maps MCP Server
MCP Server for the Google Maps API.
## Tools
1. `maps_geocode`
- Convert address to coordinates
- Input: `address` (string)
- Returns: location, formatted_address, place_id
2. `maps_reverse_geocode`
- Convert coordinates to address
- Inputs:
- `latitude` (number)
- `longitude` (number)
- Returns: formatted_address, place_id, address_components
3. `maps_search_places`
- Search for places using text query
- Inputs:
- `query` (string)
- `location` (optional): { latitude: number, longitude: number }
- `radius` (optional): number (meters, max 50000)
- Returns: array of places with names, addresses, locations
4. `maps_place_details`
- Get detailed information about a place
- Input: `place_id` (string)
- Returns: name, address, contact info, ratings, reviews, opening hours
5. `maps_distance_matrix`
- Calculate distances and times between points
- Inputs:
- `origins` (string[])
- `destinations` (string[])
- `mode` (optional): "driving" | "walking" | "bicycling" | "transit"
- Returns: distances and durations matrix
6. `maps_elevation`
- Get elevation data for locations
- Input: `locations` (array of {latitude, longitude})
- Returns: elevation data for each point
7. `maps_directions`
- Get directions between points
- Inputs:
- `origin` (string)
- `destination` (string)
- `mode` (optional): "driving" | "walking" | "bicycling" | "transit"
- Returns: route details with steps, distance, duration
## Setup
### API Key
Get a Google Maps API key by following the instructions [here](https://developers.google.com/maps/documentation/javascript/get-api-key#create-api-keys).
### Usage with Claude Desktop
Add the following to your `claude_desktop_config.json`:
#### Docker
```json
{
"mcpServers": {
"google-maps": {
"command": "docker",
"args": [
"run",
"-i",
"--rm",
"-e",
"GOOGLE_MAPS_API_KEY",
"mcp/google-maps"
],
"env": {
"GOOGLE_MAPS_API_KEY": "<YOUR_API_KEY>"
}
}
}
}
```
### NPX
```json
{
"mcpServers": {
"google-maps": {
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-google-maps"
],
"env": {
"GOOGLE_MAPS_API_KEY": "<YOUR_API_KEY>"
}
}
}
}
```
## Build
Docker build:
```bash
docker build -t mcp/google-maps -f src/google-maps/Dockerfile .
```
## License
This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.

View file

@ -0,0 +1,678 @@
#!/usr/bin/env node
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
CallToolRequestSchema,
ListToolsRequestSchema,
Tool,
} from "@modelcontextprotocol/sdk/types.js";
import fetch from "node-fetch";
// Response interfaces
interface GoogleMapsResponse {
status: string;
error_message?: string;
}
interface GeocodeResponse extends GoogleMapsResponse {
results: Array<{
place_id: string;
formatted_address: string;
geometry: {
location: {
lat: number;
lng: number;
}
};
address_components: Array<{
long_name: string;
short_name: string;
types: string[];
}>;
}>;
}
interface PlacesSearchResponse extends GoogleMapsResponse {
results: Array<{
name: string;
place_id: string;
formatted_address: string;
geometry: {
location: {
lat: number;
lng: number;
}
};
rating?: number;
types: string[];
}>;
}
interface PlaceDetailsResponse extends GoogleMapsResponse {
result: {
name: string;
place_id: string;
formatted_address: string;
formatted_phone_number?: string;
website?: string;
rating?: number;
reviews?: Array<{
author_name: string;
rating: number;
text: string;
time: number;
}>;
opening_hours?: {
weekday_text: string[];
open_now: boolean;
};
geometry: {
location: {
lat: number;
lng: number;
}
};
};
}
interface DistanceMatrixResponse extends GoogleMapsResponse {
origin_addresses: string[];
destination_addresses: string[];
rows: Array<{
elements: Array<{
status: string;
duration: {
text: string;
value: number;
};
distance: {
text: string;
value: number;
};
}>;
}>;
}
interface ElevationResponse extends GoogleMapsResponse {
results: Array<{
elevation: number;
location: {
lat: number;
lng: number;
};
resolution: number;
}>;
}
interface DirectionsResponse extends GoogleMapsResponse {
routes: Array<{
summary: string;
legs: Array<{
distance: {
text: string;
value: number;
};
duration: {
text: string;
value: number;
};
steps: Array<{
html_instructions: string;
distance: {
text: string;
value: number;
};
duration: {
text: string;
value: number;
};
travel_mode: string;
}>;
}>;
}>;
}
function getApiKey(): string {
const apiKey = process.env.GOOGLE_MAPS_API_KEY;
if (!apiKey) {
console.error("GOOGLE_MAPS_API_KEY environment variable is not set");
process.exit(1);
}
return apiKey;
}
const GOOGLE_MAPS_API_KEY = getApiKey();
// Tool definitions
const GEOCODE_TOOL: Tool = {
name: "maps_geocode",
description: "Convert an address into geographic coordinates",
inputSchema: {
type: "object",
properties: {
address: {
type: "string",
description: "The address to geocode"
}
},
required: ["address"]
}
};
const REVERSE_GEOCODE_TOOL: Tool = {
name: "maps_reverse_geocode",
description: "Convert coordinates into an address",
inputSchema: {
type: "object",
properties: {
latitude: {
type: "number",
description: "Latitude coordinate"
},
longitude: {
type: "number",
description: "Longitude coordinate"
}
},
required: ["latitude", "longitude"]
}
};
const SEARCH_PLACES_TOOL: Tool = {
name: "maps_search_places",
description: "Search for places using Google Places API",
inputSchema: {
type: "object",
properties: {
query: {
type: "string",
description: "Search query"
},
location: {
type: "object",
properties: {
latitude: { type: "number" },
longitude: { type: "number" }
},
description: "Optional center point for the search"
},
radius: {
type: "number",
description: "Search radius in meters (max 50000)"
}
},
required: ["query"]
}
};
const PLACE_DETAILS_TOOL: Tool = {
name: "maps_place_details",
description: "Get detailed information about a specific place",
inputSchema: {
type: "object",
properties: {
place_id: {
type: "string",
description: "The place ID to get details for"
}
},
required: ["place_id"]
}
};
const DISTANCE_MATRIX_TOOL: Tool = {
name: "maps_distance_matrix",
description: "Calculate travel distance and time for multiple origins and destinations",
inputSchema: {
type: "object",
properties: {
origins: {
type: "array",
items: { type: "string" },
description: "Array of origin addresses or coordinates"
},
destinations: {
type: "array",
items: { type: "string" },
description: "Array of destination addresses or coordinates"
},
mode: {
type: "string",
description: "Travel mode (driving, walking, bicycling, transit)",
enum: ["driving", "walking", "bicycling", "transit"]
}
},
required: ["origins", "destinations"]
}
};
const ELEVATION_TOOL: Tool = {
name: "maps_elevation",
description: "Get elevation data for locations on the earth",
inputSchema: {
type: "object",
properties: {
locations: {
type: "array",
items: {
type: "object",
properties: {
latitude: { type: "number" },
longitude: { type: "number" }
},
required: ["latitude", "longitude"]
},
description: "Array of locations to get elevation for"
}
},
required: ["locations"]
}
};
const DIRECTIONS_TOOL: Tool = {
name: "maps_directions",
description: "Get directions between two points",
inputSchema: {
type: "object",
properties: {
origin: {
type: "string",
description: "Starting point address or coordinates"
},
destination: {
type: "string",
description: "Ending point address or coordinates"
},
mode: {
type: "string",
description: "Travel mode (driving, walking, bicycling, transit)",
enum: ["driving", "walking", "bicycling", "transit"]
}
},
required: ["origin", "destination"]
}
};
const MAPS_TOOLS = [
GEOCODE_TOOL,
REVERSE_GEOCODE_TOOL,
SEARCH_PLACES_TOOL,
PLACE_DETAILS_TOOL,
DISTANCE_MATRIX_TOOL,
ELEVATION_TOOL,
DIRECTIONS_TOOL,
] as const;
// API handlers
async function handleGeocode(address: string) {
const url = new URL("https://maps.googleapis.com/maps/api/geocode/json");
url.searchParams.append("address", address);
url.searchParams.append("key", GOOGLE_MAPS_API_KEY);
const response = await fetch(url.toString());
const data = await response.json() as GeocodeResponse;
if (data.status !== "OK") {
return {
content: [{
type: "text",
text: `Geocoding failed: ${data.error_message || data.status}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
location: data.results[0].geometry.location,
formatted_address: data.results[0].formatted_address,
place_id: data.results[0].place_id
}, null, 2)
}],
isError: false
};
}
async function handleReverseGeocode(latitude: number, longitude: number) {
const url = new URL("https://maps.googleapis.com/maps/api/geocode/json");
url.searchParams.append("latlng", `${latitude},${longitude}`);
url.searchParams.append("key", GOOGLE_MAPS_API_KEY);
const response = await fetch(url.toString());
const data = await response.json() as GeocodeResponse;
if (data.status !== "OK") {
return {
content: [{
type: "text",
text: `Reverse geocoding failed: ${data.error_message || data.status}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
formatted_address: data.results[0].formatted_address,
place_id: data.results[0].place_id,
address_components: data.results[0].address_components
}, null, 2)
}],
isError: false
};
}
async function handlePlaceSearch(
query: string,
location?: { latitude: number; longitude: number },
radius?: number
) {
const url = new URL("https://maps.googleapis.com/maps/api/place/textsearch/json");
url.searchParams.append("query", query);
url.searchParams.append("key", GOOGLE_MAPS_API_KEY);
if (location) {
url.searchParams.append("location", `${location.latitude},${location.longitude}`);
}
if (radius) {
url.searchParams.append("radius", radius.toString());
}
const response = await fetch(url.toString());
const data = await response.json() as PlacesSearchResponse;
if (data.status !== "OK") {
return {
content: [{
type: "text",
text: `Place search failed: ${data.error_message || data.status}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
places: data.results.map((place) => ({
name: place.name,
formatted_address: place.formatted_address,
location: place.geometry.location,
place_id: place.place_id,
rating: place.rating,
types: place.types
}))
}, null, 2)
}],
isError: false
};
}
async function handlePlaceDetails(place_id: string) {
const url = new URL("https://maps.googleapis.com/maps/api/place/details/json");
url.searchParams.append("place_id", place_id);
url.searchParams.append("key", GOOGLE_MAPS_API_KEY);
const response = await fetch(url.toString());
const data = await response.json() as PlaceDetailsResponse;
if (data.status !== "OK") {
return {
content: [{
type: "text",
text: `Place details request failed: ${data.error_message || data.status}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
name: data.result.name,
formatted_address: data.result.formatted_address,
location: data.result.geometry.location,
formatted_phone_number: data.result.formatted_phone_number,
website: data.result.website,
rating: data.result.rating,
reviews: data.result.reviews,
opening_hours: data.result.opening_hours
}, null, 2)
}],
isError: false
};
}
async function handleDistanceMatrix(
origins: string[],
destinations: string[],
mode: "driving" | "walking" | "bicycling" | "transit" = "driving"
) {
const url = new URL("https://maps.googleapis.com/maps/api/distancematrix/json");
url.searchParams.append("origins", origins.join("|"));
url.searchParams.append("destinations", destinations.join("|"));
url.searchParams.append("mode", mode);
url.searchParams.append("key", GOOGLE_MAPS_API_KEY);
const response = await fetch(url.toString());
const data = await response.json() as DistanceMatrixResponse;
if (data.status !== "OK") {
return {
content: [{
type: "text",
text: `Distance matrix request failed: ${data.error_message || data.status}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
origin_addresses: data.origin_addresses,
destination_addresses: data.destination_addresses,
results: data.rows.map((row) => ({
elements: row.elements.map((element) => ({
status: element.status,
duration: element.duration,
distance: element.distance
}))
}))
}, null, 2)
}],
isError: false
};
}
async function handleElevation(locations: Array<{ latitude: number; longitude: number }>) {
const url = new URL("https://maps.googleapis.com/maps/api/elevation/json");
const locationString = locations
.map((loc) => `${loc.latitude},${loc.longitude}`)
.join("|");
url.searchParams.append("locations", locationString);
url.searchParams.append("key", GOOGLE_MAPS_API_KEY);
const response = await fetch(url.toString());
const data = await response.json() as ElevationResponse;
if (data.status !== "OK") {
return {
content: [{
type: "text",
text: `Elevation request failed: ${data.error_message || data.status}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
results: data.results.map((result) => ({
elevation: result.elevation,
location: result.location,
resolution: result.resolution
}))
}, null, 2)
}],
isError: false
};
}
async function handleDirections(
origin: string,
destination: string,
mode: "driving" | "walking" | "bicycling" | "transit" = "driving"
) {
const url = new URL("https://maps.googleapis.com/maps/api/directions/json");
url.searchParams.append("origin", origin);
url.searchParams.append("destination", destination);
url.searchParams.append("mode", mode);
url.searchParams.append("key", GOOGLE_MAPS_API_KEY);
const response = await fetch(url.toString());
const data = await response.json() as DirectionsResponse;
if (data.status !== "OK") {
return {
content: [{
type: "text",
text: `Directions request failed: ${data.error_message || data.status}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
routes: data.routes.map((route) => ({
summary: route.summary,
distance: route.legs[0].distance,
duration: route.legs[0].duration,
steps: route.legs[0].steps.map((step) => ({
instructions: step.html_instructions,
distance: step.distance,
duration: step.duration,
travel_mode: step.travel_mode
}))
}))
}, null, 2)
}],
isError: false
};
}
// Server setup
const server = new Server(
{
name: "mcp-server/google-maps",
version: "0.1.0",
},
{
capabilities: {
tools: {},
},
},
);
// Set up request handlers
server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: MAPS_TOOLS,
}));
server.setRequestHandler(CallToolRequestSchema, async (request) => {
try {
switch (request.params.name) {
case "maps_geocode": {
const { address } = request.params.arguments as { address: string };
return await handleGeocode(address);
}
case "maps_reverse_geocode": {
const { latitude, longitude } = request.params.arguments as {
latitude: number;
longitude: number;
};
return await handleReverseGeocode(latitude, longitude);
}
case "maps_search_places": {
const { query, location, radius } = request.params.arguments as {
query: string;
location?: { latitude: number; longitude: number };
radius?: number;
};
return await handlePlaceSearch(query, location, radius);
}
case "maps_place_details": {
const { place_id } = request.params.arguments as { place_id: string };
return await handlePlaceDetails(place_id);
}
case "maps_distance_matrix": {
const { origins, destinations, mode } = request.params.arguments as {
origins: string[];
destinations: string[];
mode?: "driving" | "walking" | "bicycling" | "transit";
};
return await handleDistanceMatrix(origins, destinations, mode);
}
case "maps_elevation": {
const { locations } = request.params.arguments as {
locations: Array<{ latitude: number; longitude: number }>;
};
return await handleElevation(locations);
}
case "maps_directions": {
const { origin, destination, mode } = request.params.arguments as {
origin: string;
destination: string;
mode?: "driving" | "walking" | "bicycling" | "transit";
};
return await handleDirections(origin, destination, mode);
}
default:
return {
content: [{
type: "text",
text: `Unknown tool: ${request.params.name}`
}],
isError: true
};
}
} catch (error) {
return {
content: [{
type: "text",
text: `Error: ${error instanceof Error ? error.message : String(error)}`
}],
isError: true
};
}
});
async function runServer() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error("Google Maps MCP Server running on stdio");
}
runServer().catch((error) => {
console.error("Fatal error running server:", error);
process.exit(1);
});

View file

@ -0,0 +1,21 @@
FROM node:22.12-alpine AS builder
COPY . /app
WORKDIR /app
RUN --mount=type=cache,target=/root/.npm npm install
FROM node:22-alpine AS release
WORKDIR /app
COPY --from=builder /app/dist /app/dist
COPY --from=builder /app/package.json /app/package.json
COPY --from=builder /app/package-lock.json /app/package-lock.json
ENV NODE_ENV=production
RUN npm ci --ignore-scripts --omit-dev
ENTRYPOINT ["node", "dist/index.js"]

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 185 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 235 KiB

View file

@ -0,0 +1,310 @@
#!/usr/bin/env node
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
CallToolRequestSchema,
ListToolsRequestSchema,
Tool,
} from "@modelcontextprotocol/sdk/types.js";
/**
* Definition of the Perplexity Ask Tool.
* This tool accepts an array of messages and returns a chat completion response
* from the Perplexity API, with citations appended to the message if provided.
*/
const PERPLEXITY_ASK_TOOL: Tool = {
name: "perplexity_ask",
description:
"Engages in a conversation using the Sonar API. " +
"Accepts an array of messages (each with a role and content) " +
"and returns a ask completion response from the Perplexity model.",
inputSchema: {
type: "object",
properties: {
messages: {
type: "array",
items: {
type: "object",
properties: {
role: {
type: "string",
description: "Role of the message (e.g., system, user, assistant)",
},
content: {
type: "string",
description: "The content of the message",
},
},
required: ["role", "content"],
},
description: "Array of conversation messages",
},
},
required: ["messages"],
},
};
/**
* Definition of the Perplexity Research Tool.
* This tool performs deep research queries using the Perplexity API.
*/
const PERPLEXITY_RESEARCH_TOOL: Tool = {
name: "perplexity_research",
description:
"Performs deep research using the Perplexity API. " +
"Accepts an array of messages (each with a role and content) " +
"and returns a comprehensive research response with citations.",
inputSchema: {
type: "object",
properties: {
messages: {
type: "array",
items: {
type: "object",
properties: {
role: {
type: "string",
description: "Role of the message (e.g., system, user, assistant)",
},
content: {
type: "string",
description: "The content of the message",
},
},
required: ["role", "content"],
},
description: "Array of conversation messages",
},
},
required: ["messages"],
},
};
/**
* Definition of the Perplexity Reason Tool.
* This tool performs reasoning queries using the Perplexity API.
*/
const PERPLEXITY_REASON_TOOL: Tool = {
name: "perplexity_reason",
description:
"Performs reasoning tasks using the Perplexity API. " +
"Accepts an array of messages (each with a role and content) " +
"and returns a well-reasoned response using the sonar-reasoning-pro model.",
inputSchema: {
type: "object",
properties: {
messages: {
type: "array",
items: {
type: "object",
properties: {
role: {
type: "string",
description: "Role of the message (e.g., system, user, assistant)",
},
content: {
type: "string",
description: "The content of the message",
},
},
required: ["role", "content"],
},
description: "Array of conversation messages",
},
},
required: ["messages"],
},
};
// Retrieve the Perplexity API key from environment variables
const PERPLEXITY_API_KEY = process.env.PERPLEXITY_API_KEY;
if (!PERPLEXITY_API_KEY) {
console.error("Error: PERPLEXITY_API_KEY environment variable is required");
process.exit(1);
}
/**
* Performs a chat completion by sending a request to the Perplexity API.
* Appends citations to the returned message content if they exist.
*
* @param {Array<{ role: string; content: string }>} messages - An array of message objects.
* @param {string} model - The model to use for the completion.
* @returns {Promise<string>} The chat completion result with appended citations.
* @throws Will throw an error if the API request fails.
*/
async function performChatCompletion(
messages: Array<{ role: string; content: string }>,
model: string = "sonar-pro"
): Promise<string> {
// Construct the API endpoint URL and request body
const url = new URL("https://api.perplexity.ai/chat/completions");
const body = {
model: model, // Model identifier passed as parameter
messages: messages,
// Additional parameters can be added here if required (e.g., max_tokens, temperature, etc.)
// See the Sonar API documentation for more details:
// https://docs.perplexity.ai/api-reference/chat-completions
};
let response;
try {
response = await fetch(url.toString(), {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${PERPLEXITY_API_KEY}`,
},
body: JSON.stringify(body),
});
} catch (error) {
throw new Error(`Network error while calling Perplexity API: ${error}`);
}
// Check for non-successful HTTP status
if (!response.ok) {
let errorText;
try {
errorText = await response.text();
} catch (parseError) {
errorText = "Unable to parse error response";
}
throw new Error(
`Perplexity API error: ${response.status} ${response.statusText}\n${errorText}`
);
}
// Attempt to parse the JSON response from the API
let data;
try {
data = await response.json();
} catch (jsonError) {
throw new Error(`Failed to parse JSON response from Perplexity API: ${jsonError}`);
}
// Directly retrieve the main message content from the response
let messageContent = data.choices[0].message.content;
// If citations are provided, append them to the message content
if (data.citations && Array.isArray(data.citations) && data.citations.length > 0) {
messageContent += "\n\nCitations:\n";
data.citations.forEach((citation: string, index: number) => {
messageContent += `[${index + 1}] ${citation}\n`;
});
}
return messageContent;
}
// Initialize the server with tool metadata and capabilities
const server = new Server(
{
name: "example-servers/perplexity-ask",
version: "0.1.0",
},
{
capabilities: {
tools: {},
},
}
);
/**
* Registers a handler for listing available tools.
* When the client requests a list of tools, this handler returns all available Perplexity tools.
*/
server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [PERPLEXITY_ASK_TOOL, PERPLEXITY_RESEARCH_TOOL, PERPLEXITY_REASON_TOOL],
}));
/**
* Registers a handler for calling a specific tool.
* Processes requests by validating input and invoking the appropriate tool.
*
* @param {object} request - The incoming tool call request.
* @returns {Promise<object>} The response containing the tool's result or an error.
*/
server.setRequestHandler(CallToolRequestSchema, async (request) => {
try {
const { name, arguments: args } = request.params;
if (!args) {
throw new Error("No arguments provided");
}
switch (name) {
case "perplexity_ask": {
if (!Array.isArray(args.messages)) {
throw new Error("Invalid arguments for perplexity_ask: 'messages' must be an array");
}
// Invoke the chat completion function with the provided messages
const messages = args.messages;
const result = await performChatCompletion(messages, "sonar-pro");
return {
content: [{ type: "text", text: result }],
isError: false,
};
}
case "perplexity_research": {
if (!Array.isArray(args.messages)) {
throw new Error("Invalid arguments for perplexity_research: 'messages' must be an array");
}
// Invoke the chat completion function with the provided messages using the deep research model
const messages = args.messages;
const result = await performChatCompletion(messages, "sonar-deep-research");
return {
content: [{ type: "text", text: result }],
isError: false,
};
}
case "perplexity_reason": {
if (!Array.isArray(args.messages)) {
throw new Error("Invalid arguments for perplexity_reason: 'messages' must be an array");
}
// Invoke the chat completion function with the provided messages using the reasoning model
const messages = args.messages;
const result = await performChatCompletion(messages, "sonar-reasoning-pro");
return {
content: [{ type: "text", text: result }],
isError: false,
};
}
default:
// Respond with an error if an unknown tool is requested
return {
content: [{ type: "text", text: `Unknown tool: ${name}` }],
isError: true,
};
}
} catch (error) {
// Return error details in the response
return {
content: [
{
type: "text",
text: `Error: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
});
/**
* Initializes and runs the server using standard I/O for communication.
* Logs an error and exits if the server fails to start.
*/
async function runServer() {
try {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error("Perplexity MCP Server running on stdio with Ask, Research, and Reason tools");
} catch (error) {
console.error("Fatal error running server:", error);
process.exit(1);
}
}
// Start the server and catch any startup errors
runServer().catch((error) => {
console.error("Fatal error running server:", error);
process.exit(1);
});

View file

@ -0,0 +1,272 @@
<div align="center" style="display: flex; align-items: center; justify-content: center; gap: 10px;">
<img src="https://upload.wikimedia.org/wikipedia/commons/8/84/Spotify_icon.svg" width="30" height="30">
<h1>Spotify MCP Server</h1>
</div>
A lightweight [Model Context Protocol (MCP)](https://modelcontextprotocol.io) server that enables AI assistants like Cursor & Claude to control Spotify playback and manage playlists.
<details>
<summary>Contents</summary>
- [Example Interactions](#example-interactions)
- [Tools](#tools)
- [Read Operations](#read-operations)
- [Play / Create Operations](#play--create-operations)
- [Setup](#setup)
- [Prerequisites](#prerequisites)
- [Installation](#installation)
- [Creating a Spotify Developer Application](#creating-a-spotify-developer-application)
- [Spotify API Configuration](#spotify-api-configuration)
- [Authentication Process](#authentication-process)
- [Integrating with Claude Desktop, Cursor, and VsCode (Cline)](#integrating-with-claude-desktop-and-cursor)
</details>
## Example Interactions
- _"Play Elvis's first song"_
- _"Create a Taylor Swift / Slipknot fusion playlist"_
- _"Copy all the techno tracks from my workout playlist to my work playlist"_
## Tools
### Read Operations
1. **searchSpotify**
- **Description**: Search for tracks, albums, artists, or playlists on Spotify
- **Parameters**:
- `query` (string): The search term
- `type` (string): Type of item to search for (track, album, artist, playlist)
- `limit` (number, optional): Maximum number of results to return (10-50)
- **Returns**: List of matching items with their IDs, names, and additional details
- **Example**: `searchSpotify("bohemian rhapsody", "track", 20)`
2. **getNowPlaying**
- **Description**: Get information about the currently playing track on Spotify
- **Parameters**: None
- **Returns**: Object containing track name, artist, album, playback progress, duration, and playback state
- **Example**: `getNowPlaying()`
3. **getMyPlaylists**
- **Description**: Get a list of the current user's playlists on Spotify
- **Parameters**:
- `limit` (number, optional): Maximum number of playlists to return (default: 20)
- `offset` (number, optional): Index of the first playlist to return (default: 0)
- **Returns**: Array of playlists with their IDs, names, track counts, and public status
- **Example**: `getMyPlaylists(10, 0)`
4. **getPlaylistTracks**
- **Description**: Get a list of tracks in a specific Spotify playlist
- **Parameters**:
- `playlistId` (string): The Spotify ID of the playlist
- `limit` (number, optional): Maximum number of tracks to return (default: 100)
- `offset` (number, optional): Index of the first track to return (default: 0)
- **Returns**: Array of tracks with their IDs, names, artists, album, duration, and added date
- **Example**: `getPlaylistTracks("37i9dQZEVXcJZyENOWUFo7")`
5. **getRecentlyPlayed**
- **Description**: Retrieves a list of recently played tracks from Spotify.
- **Parameters**:
- `limit` (number, optional): A number specifying the maximum number of tracks to return.
- **Returns**: If tracks are found it returns a formatted list of recently played tracks else a message stating: "You don't have any recently played tracks on Spotify".
- **Example**: `getRecentlyPlayed({ limit: 10 })`
6. **getRecentlyPlayed**
- **Description**: Retrieves a list of recently played tracks from Spotify.
- **Parameters**:
- `limit` (number, optional): A number specifying the maximum number of tracks to return.
- **Returns**: If tracks are found it returns a formatted list of recently played tracks else a message stating: "You don't have any recently played tracks on Spotify".
- **Example**: `getRecentlyPlayed({ limit: 10 })`
### Play / Create Operations
1. **playMusic**
- **Description**: Start playing a track, album, artist, or playlist on Spotify
- **Parameters**:
- `uri` (string, optional): Spotify URI of the item to play (overrides type and id)
- `type` (string, optional): Type of item to play (track, album, artist, playlist)
- `id` (string, optional): Spotify ID of the item to play
- `deviceId` (string, optional): ID of the device to play on
- **Returns**: Success status
- **Example**: `playMusic({ uri: "spotify:track:6rqhFgbbKwnb9MLmUQDhG6" })`
- **Alternative**: `playMusic({ type: "track", id: "6rqhFgbbKwnb9MLmUQDhG6" })`
2. **pausePlayback**
- **Description**: Pause the currently playing track on Spotify
- **Parameters**:
- `deviceId` (string, optional): ID of the device to pause
- **Returns**: Success status
- **Example**: `pausePlayback()`
3. **skipToNext**
- **Description**: Skip to the next track in the current playback queue
- **Parameters**:
- `deviceId` (string, optional): ID of the device
- **Returns**: Success status
- **Example**: `skipToNext()`
4. **skipToPrevious**
- **Description**: Skip to the previous track in the current playback queue
- **Parameters**:
- `deviceId` (string, optional): ID of the device
- **Returns**: Success status
- **Example**: `skipToPrevious()`
5. **createPlaylist**
- **Description**: Create a new playlist on Spotify
- **Parameters**:
- `name` (string): Name for the new playlist
- `description` (string, optional): Description for the playlist
- `public` (boolean, optional): Whether the playlist should be public (default: false)
- **Returns**: Object with the new playlist's ID and URL
- **Example**: `createPlaylist({ name: "Workout Mix", description: "Songs to get pumped up", public: false })`
6. **addTracksToPlaylist**
- **Description**: Add tracks to an existing Spotify playlist
- **Parameters**:
- `playlistId` (string): ID of the playlist
- `trackUris` (array): Array of track URIs or IDs to add
- `position` (number, optional): Position to insert tracks
- **Returns**: Success status and snapshot ID
- **Example**: `addTracksToPlaylist({ playlistId: "3cEYpjA9oz9GiPac4AsH4n", trackUris: ["spotify:track:4iV5W9uYEdYUVa79Axb7Rh"] })`
7. **addToQueue**
- **Description**: Adds a track, album, artist or playlist to the current playback queue
- - **Parameters**:
- `uri` (string, optional): Spotify URI of the item to add to queue (overrides type and id)
- `type` (string, optional): Type of item to queue (track, album, artist, playlist)
- `id` (string, optional): Spotify ID of the item to queue
- `deviceId` (string, optional): ID of the device to queue on
- **Returns**: Success status
- **Example**: `addToQueue({ uri: "spotify:track:6rqhFgbbKwnb9MLmUQDhG6" })`
- **Alternative**: `addToQueue({ type: "track", id: "6rqhFgbbKwnb9MLmUQDhG6" })`
## Setup
### Prerequisites
- Node.js v16+
- A Spotify Premium account
- A registered Spotify Developer application
### Installation
```bash
git clone https://github.com/marcelmarais/spotify-mcp-server.git
cd spotify-mcp-server
npm install
npm run build
```
### Creating a Spotify Developer Application
1. Go to the [Spotify Developer Dashboard](https://developer.spotify.com/dashboard/)
2. Log in with your Spotify account
3. Click the "Create an App" button
4. Fill in the app name and description
5. Accept the Terms of Service and click "Create"
6. In your new app's dashboard, you'll see your **Client ID**
7. Click "Show Client Secret" to reveal your **Client Secret**
8. Click "Edit Settings" and add a Redirect URI (e.g., `http://localhost:8888/callback`)
9. Save your changes
### Spotify API Configuration
Create a `spotify-config.json` file in the project root (you can copy and modify the provided example):
```bash
# Copy the example config file
cp spotify-config.example.json spotify-config.json
```
Then edit the file with your credentials:
```json
{
"clientId": "your-client-id",
"clientSecret": "your-client-secret",
"redirectUri": "http://localhost:8888/callback"
}
```
### Authentication Process
The Spotify API uses OAuth 2.0 for authentication. Follow these steps to authenticate your application:
1. Run the authentication script:
```bash
npm run auth
```
2. The script will generate an authorization URL. Open this URL in your web browser.
3. You'll be prompted to log in to Spotify and authorize your application.
4. After authorization, Spotify will redirect you to your specified redirect URI with a code parameter in the URL.
5. The authentication script will automatically exchange this code for access and refresh tokens.
6. These tokens will be saved to your `spotify-config.json` file, which will now look something like:
```json
{
"clientId": "your-client-id",
"clientSecret": "your-client-secret",
"redirectUri": "http://localhost:8888/callback",
"accessToken": "BQAi9Pn...kKQ",
"refreshToken": "AQDQcj...7w",
"expiresAt": 1677889354671
}
```
7. The server will automatically refresh the access token when needed, using the refresh token.
## Integrating with Claude Desktop, Cursor, and VsCode [Via Cline model extension](https://marketplace.visualstudio.com/items/?itemName=saoudrizwan.claude-dev)
To use your MCP server with Claude Desktop, add it to your Claude configuration:
```json
{
"mcpServers": {
"spotify": {
"command": "node",
"args": ["spotify-mcp-server/build/index.js"]
}
}
}
```
For Cursor, go to the MCP tab in `Cursor Settings` (command + shift + J). Add a server with this command:
```bash
node path/to/spotify-mcp-server/build/index.js
```
To set up your MCP correctly with Cline ensure you have the following file configuration set `cline_mcp_settings.json`:
```json
{
"mcpServers": {
"spotify": {
"command": "node",
"args": ["~/../spotify-mcp-server/build/index.js"],
"autoApprove": ["getListeningHistory", "getNowPlaying"]
}
}
}
```
You can add additional tools to the auto approval array to run the tools without intervention.

View file

@ -0,0 +1,134 @@
{
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
"files": {
"ignoreUnknown": false,
"ignore": [
"**/pnpm-lock.yaml",
"lib/db/migrations",
"lib/editor/react-renderer.tsx",
"node_modules",
".next",
"public",
".vercel"
]
},
"vcs": {
"enabled": true,
"clientKind": "git",
"defaultBranch": "main",
"useIgnoreFile": true
},
"formatter": {
"enabled": true,
"formatWithErrors": false,
"indentStyle": "space",
"indentWidth": 2,
"lineEnding": "lf",
"lineWidth": 80,
"attributePosition": "auto"
},
"linter": {
"enabled": true,
"rules": {
"recommended": true,
"a11y": {
"useHtmlLang": "warn", // Not in recommended ruleset, turning on manually
"noHeaderScope": "warn", // Not in recommended ruleset, turning on manually
"useValidAriaRole": {
"level": "warn",
"options": {
"ignoreNonDom": false,
"allowInvalidRoles": ["none", "text"]
}
},
"useSemanticElements": "off", // Rule is buggy, revisit later
"noSvgWithoutTitle": "off", // We do not intend to adhere to this rule
"useMediaCaption": "off", // We would need a cultural change to turn this on
"noAutofocus": "off", // We're highly intentional about when we use autofocus
"noBlankTarget": "off", // Covered by Conformance
"useFocusableInteractive": "off", // Disable focusable interactive element requirement
"useAriaPropsForRole": "off", // Disable required ARIA attributes check
"useKeyWithClickEvents": "off" // Disable keyboard event requirement with click events
},
"complexity": {
"noUselessStringConcat": "warn", // Not in recommended ruleset, turning on manually
"noForEach": "off", // forEach is too familiar to ban
"noUselessSwitchCase": "off", // Turned off due to developer preferences
"noUselessThisAlias": "off" // Turned off due to developer preferences
},
"correctness": {
"noUnusedImports": "warn", // Not in recommended ruleset, turning on manually
"useArrayLiterals": "warn", // Not in recommended ruleset, turning on manually
"noNewSymbol": "warn", // Not in recommended ruleset, turning on manually
"useJsxKeyInIterable": "off", // Rule is buggy, revisit later
"useExhaustiveDependencies": "off", // Community feedback on this rule has been poor, we will continue with ESLint
"noUnnecessaryContinue": "off" // Turned off due to developer preferences
},
"security": {
"noDangerouslySetInnerHtml": "off" // Covered by Conformance
},
"style": {
"useFragmentSyntax": "warn", // Not in recommended ruleset, turning on manually
"noYodaExpression": "warn", // Not in recommended ruleset, turning on manually
"useDefaultParameterLast": "warn", // Not in recommended ruleset, turning on manually
"useExponentiationOperator": "off", // Obscure and arguably not easily readable
"noUnusedTemplateLiteral": "off", // Stylistic opinion
"noUselessElse": "off" // Stylistic opinion
},
"suspicious": {
"noExplicitAny": "off" // We trust Vercelians to use any only when necessary
},
"nursery": {
"noStaticElementInteractions": "warn",
"noHeadImportInDocument": "warn",
"noDocumentImportInPage": "warn",
"noDuplicateElseIf": "warn",
"noIrregularWhitespace": "warn",
"useValidAutocomplete": "warn"
}
}
},
"javascript": {
"jsxRuntime": "reactClassic",
"formatter": {
"jsxQuoteStyle": "double",
"quoteProperties": "asNeeded",
"trailingCommas": "all",
"semicolons": "always",
"arrowParentheses": "always",
"bracketSpacing": true,
"bracketSameLine": false,
"quoteStyle": "single",
"attributePosition": "auto"
}
},
"json": {
"formatter": {
"enabled": true,
"trailingCommas": "none"
},
"parser": {
"allowComments": true,
"allowTrailingCommas": false
}
},
"css": {
"formatter": { "enabled": false },
"linter": { "enabled": false }
},
"organizeImports": { "enabled": false },
"overrides": [
// Playwright requires an object destructure, even if empty
// https://github.com/microsoft/playwright/issues/30007
{
"include": ["playwright/**"],
"linter": {
"rules": {
"correctness": {
"noEmptyPattern": "off"
}
}
}
}
]
}

View file

@ -0,0 +1,14 @@
#!/usr/bin/env node
import { authorizeSpotify } from './utils.js';
console.log('Starting Spotify authentication flow...');
authorizeSpotify()
.then(() => {
console.log('Authentication completed successfully!');
process.exit(0);
})
.catch((error) => {
console.error('Authentication failed:', error);
process.exit(1);
});

View file

@ -0,0 +1,23 @@
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { playTools } from './play.js';
import { readTools } from './read.js';
const server = new McpServer({
name: 'spotify-controller',
version: '1.0.0',
});
[...readTools, ...playTools].forEach((tool) => {
server.tool(tool.name, tool.description, tool.schema, tool.handler);
});
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
}
main().catch((error) => {
console.error('Fatal error in main():', error);
process.exit(1);
});

View file

@ -0,0 +1,371 @@
import { handleSpotifyRequest } from './utils.js';
import { z } from 'zod';
import type { SpotifyHandlerExtra, tool } from './types.js';
const playMusic: tool<{
uri: z.ZodOptional<z.ZodString>;
type: z.ZodOptional<z.ZodEnum<['track', 'album', 'artist', 'playlist']>>;
id: z.ZodOptional<z.ZodString>;
deviceId: z.ZodOptional<z.ZodString>;
}> = {
name: 'playMusic',
description: 'Start playing a Spotify track, album, artist, or playlist',
schema: {
uri: z
.string()
.optional()
.describe('The Spotify URI to play (overrides type and id)'),
type: z
.enum(['track', 'album', 'artist', 'playlist'])
.optional()
.describe('The type of item to play'),
id: z.string().optional().describe('The Spotify ID of the item to play'),
deviceId: z
.string()
.optional()
.describe('The Spotify device ID to play on'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { uri, type, id, deviceId } = args;
if (!uri && (!type || !id)) {
return {
content: [
{
type: 'text',
text: 'Error: Must provide either a URI or both a type and ID',
isError: true,
},
],
};
}
let spotifyUri = uri;
if (!spotifyUri && type && id) {
spotifyUri = `spotify:${type}:${id}`;
}
await handleSpotifyRequest(async (spotifyApi) => {
const device = deviceId || '';
if (!spotifyUri) {
await spotifyApi.player.startResumePlayback(device);
return;
}
if (type === 'track') {
await spotifyApi.player.startResumePlayback(device, undefined, [
spotifyUri,
]);
} else {
await spotifyApi.player.startResumePlayback(device, spotifyUri);
}
});
return {
content: [
{
type: 'text',
text: `Started playing ${type || 'music'} ${id ? `(ID: ${id})` : ''}`,
},
],
};
},
};
const pausePlayback: tool<{
deviceId: z.ZodOptional<z.ZodString>;
}> = {
name: 'pausePlayback',
description: 'Pause Spotify playback on the active device',
schema: {
deviceId: z
.string()
.optional()
.describe('The Spotify device ID to pause playback on'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { deviceId } = args;
await handleSpotifyRequest(async (spotifyApi) => {
await spotifyApi.player.pausePlayback(deviceId || '');
});
return {
content: [
{
type: 'text',
text: 'Playback paused',
},
],
};
},
};
const skipToNext: tool<{
deviceId: z.ZodOptional<z.ZodString>;
}> = {
name: 'skipToNext',
description: 'Skip to the next track in the current Spotify playback queue',
schema: {
deviceId: z
.string()
.optional()
.describe('The Spotify device ID to skip on'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { deviceId } = args;
await handleSpotifyRequest(async (spotifyApi) => {
await spotifyApi.player.skipToNext(deviceId || '');
});
return {
content: [
{
type: 'text',
text: 'Skipped to next track',
},
],
};
},
};
const skipToPrevious: tool<{
deviceId: z.ZodOptional<z.ZodString>;
}> = {
name: 'skipToPrevious',
description:
'Skip to the previous track in the current Spotify playback queue',
schema: {
deviceId: z
.string()
.optional()
.describe('The Spotify device ID to skip on'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { deviceId } = args;
await handleSpotifyRequest(async (spotifyApi) => {
await spotifyApi.player.skipToPrevious(deviceId || '');
});
return {
content: [
{
type: 'text',
text: 'Skipped to previous track',
},
],
};
},
};
const createPlaylist: tool<{
name: z.ZodString;
description: z.ZodOptional<z.ZodString>;
public: z.ZodOptional<z.ZodBoolean>;
}> = {
name: 'createPlaylist',
description: 'Create a new playlist on Spotify',
schema: {
name: z.string().describe('The name of the playlist'),
description: z
.string()
.optional()
.describe('The description of the playlist'),
public: z
.boolean()
.optional()
.describe('Whether the playlist should be public'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { name, description, public: isPublic = false } = args;
const result = await handleSpotifyRequest(async (spotifyApi) => {
const me = await spotifyApi.currentUser.profile();
return await spotifyApi.playlists.createPlaylist(me.id, {
name,
description,
public: isPublic,
});
});
return {
content: [
{
type: 'text',
text: `Successfully created playlist "${name}"\nPlaylist ID: ${result.id}`,
},
],
};
},
};
const addTracksToPlaylist: tool<{
playlistId: z.ZodString;
trackIds: z.ZodArray<z.ZodString>;
position: z.ZodOptional<z.ZodNumber>;
}> = {
name: 'addTracksToPlaylist',
description: 'Add tracks to a Spotify playlist',
schema: {
playlistId: z.string().describe('The Spotify ID of the playlist'),
trackIds: z.array(z.string()).describe('Array of Spotify track IDs to add'),
position: z
.number()
.nonnegative()
.optional()
.describe('Position to insert the tracks (0-based index)'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { playlistId, trackIds, position } = args;
if (trackIds.length === 0) {
return {
content: [
{
type: 'text',
text: 'Error: No track IDs provided',
},
],
};
}
try {
const trackUris = trackIds.map((id) => `spotify:track:${id}`);
await handleSpotifyRequest(async (spotifyApi) => {
await spotifyApi.playlists.addItemsToPlaylist(
playlistId,
trackUris,
position,
);
});
return {
content: [
{
type: 'text',
text: `Successfully added ${trackIds.length} track${
trackIds.length === 1 ? '' : 's'
} to playlist (ID: ${playlistId})`,
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error adding tracks to playlist: ${
error instanceof Error ? error.message : String(error)
}`,
},
],
};
}
},
};
const resumePlayback: tool<{
deviceId: z.ZodOptional<z.ZodString>;
}> = {
name: 'resumePlayback',
description: 'Resume Spotify playback on the active device',
schema: {
deviceId: z
.string()
.optional()
.describe('The Spotify device ID to resume playback on'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { deviceId } = args;
await handleSpotifyRequest(async (spotifyApi) => {
await spotifyApi.player.startResumePlayback(deviceId || '');
});
return {
content: [
{
type: 'text',
text: 'Playback resumed',
},
],
};
},
};
const addToQueue: tool<{
uri: z.ZodOptional<z.ZodString>;
type: z.ZodOptional<z.ZodEnum<['track', 'album', 'artist', 'playlist']>>;
id: z.ZodOptional<z.ZodString>;
deviceId: z.ZodOptional<z.ZodString>;
}> = {
name: 'addToQueue',
description: 'Adds a track, album, artist or playlist to the playback queue',
schema: {
uri: z
.string()
.optional()
.describe('The Spotify URI to play (overrides type and id)'),
type: z
.enum(['track', 'album', 'artist', 'playlist'])
.optional()
.describe('The type of item to play'),
id: z.string().optional().describe('The Spotify ID of the item to play'),
deviceId: z
.string()
.optional()
.describe('The Spotify device ID to add the track to'),
},
handler: async (args) => {
const { uri, type, id, deviceId } = args;
let spotifyUri = uri;
if (!spotifyUri && type && id) {
spotifyUri = `spotify:${type}:${id}`;
}
if (!spotifyUri) {
return {
content: [
{
type: 'text',
text: 'Error: Must provide either a URI or both a type and ID',
isError: true,
},
],
};
}
await handleSpotifyRequest(async (spotifyApi) => {
await spotifyApi.player.addItemToPlaybackQueue(
spotifyUri,
deviceId || '',
);
});
return {
content: [
{
type: 'text',
text: `Added item ${spotifyUri} to queue`,
},
],
};
},
};
export const playTools = [
playMusic,
pausePlayback,
skipToNext,
skipToPrevious,
createPlaylist,
addTracksToPlaylist,
resumePlayback,
addToQueue,
];

View file

@ -0,0 +1,366 @@
import type { RequestHandlerExtra } from '@modelcontextprotocol/sdk/shared/protocol.js';
import type { MaxInt } from '@spotify/web-api-ts-sdk';
import { z } from 'zod';
import type { SpotifyHandlerExtra, SpotifyTrack, tool } from './types.js';
import { formatDuration, handleSpotifyRequest } from './utils.js';
function isTrack(item: any): item is SpotifyTrack {
return (
item &&
item.type === 'track' &&
Array.isArray(item.artists) &&
item.album &&
typeof item.album.name === 'string'
);
}
const searchSpotify: tool<{
query: z.ZodString;
type: z.ZodEnum<['track', 'album', 'artist', 'playlist']>;
limit: z.ZodOptional<z.ZodNumber>;
}> = {
name: 'searchSpotify',
description: 'Search for tracks, albums, artists, or playlists on Spotify',
schema: {
query: z.string().describe('The search query'),
type: z
.enum(['track', 'album', 'artist', 'playlist'])
.describe(
'The type of item to search for either track, album, artist, or playlist',
),
limit: z
.number()
.min(1)
.max(50)
.optional()
.describe('Maximum number of results to return (10-50)'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { query, type, limit } = args;
const limitValue = limit ?? 10;
try {
const results = await handleSpotifyRequest(async (spotifyApi) => {
return await spotifyApi.search(
query,
[type],
undefined,
limitValue as MaxInt<50>,
);
});
let formattedResults = '';
if (type === 'track' && results.tracks) {
formattedResults = results.tracks.items
.map((track, i) => {
const artists = track.artists.map((a) => a.name).join(', ');
const duration = formatDuration(track.duration_ms);
return `${i + 1}. "${
track.name
}" by ${artists} (${duration}) - ID: ${track.id}`;
})
.join('\n');
} else if (type === 'album' && results.albums) {
formattedResults = results.albums.items
.map((album, i) => {
const artists = album.artists.map((a) => a.name).join(', ');
return `${i + 1}. "${album.name}" by ${artists} - ID: ${album.id}`;
})
.join('\n');
} else if (type === 'artist' && results.artists) {
formattedResults = results.artists.items
.map((artist, i) => {
return `${i + 1}. ${artist.name} - ID: ${artist.id}`;
})
.join('\n');
} else if (type === 'playlist' && results.playlists) {
formattedResults = results.playlists.items
.map((playlist, i) => {
return `${i + 1}. "${playlist?.name ?? 'Unknown Playlist'} (${
playlist?.description ?? 'No description'
} tracks)" by ${playlist?.owner?.display_name} - ID: ${
playlist?.id
}`;
})
.join('\n');
}
return {
content: [
{
type: 'text',
text:
formattedResults.length > 0
? `# Search results for "${query}" (type: ${type})\n\n${formattedResults}`
: `No ${type} results found for "${query}"`,
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error searching for ${type}s: ${
error instanceof Error ? error.message : String(error)
}`,
},
],
};
}
},
};
const getNowPlaying: tool<Record<string, never>> = {
name: 'getNowPlaying',
description: 'Get information about the currently playing track on Spotify',
schema: {},
handler: async (args, extra: SpotifyHandlerExtra) => {
try {
const currentTrack = await handleSpotifyRequest(async (spotifyApi) => {
return await spotifyApi.player.getCurrentlyPlayingTrack();
});
if (!currentTrack || !currentTrack.item) {
return {
content: [
{
type: 'text',
text: 'Nothing is currently playing on Spotify',
},
],
};
}
const item = currentTrack.item;
if (!isTrack(item)) {
return {
content: [
{
type: 'text',
text: 'Currently playing item is not a track (might be a podcast episode)',
},
],
};
}
const artists = item.artists.map((a) => a.name).join(', ');
const album = item.album.name;
const duration = formatDuration(item.duration_ms);
const progress = formatDuration(currentTrack.progress_ms || 0);
const isPlaying = currentTrack.is_playing;
return {
content: [
{
type: 'text',
text:
`# Currently ${isPlaying ? 'Playing' : 'Paused'}\n\n` +
`**Track**: "${item.name}"\n` +
`**Artist**: ${artists}\n` +
`**Album**: ${album}\n` +
`**Progress**: ${progress} / ${duration}\n` +
`**ID**: ${item.id}`,
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error getting current track: ${
error instanceof Error ? error.message : String(error)
}`,
},
],
};
}
},
};
const getMyPlaylists: tool<{
limit: z.ZodOptional<z.ZodNumber>;
}> = {
name: 'getMyPlaylists',
description: "Get a list of the current user's playlists on Spotify",
schema: {
limit: z
.number()
.min(1)
.max(50)
.optional()
.describe('Maximum number of playlists to return (1-50)'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { limit = 50 } = args;
const playlists = await handleSpotifyRequest(async (spotifyApi) => {
return await spotifyApi.currentUser.playlists.playlists(
limit as MaxInt<50>,
);
});
if (playlists.items.length === 0) {
return {
content: [
{
type: 'text',
text: "You don't have any playlists on Spotify",
},
],
};
}
const formattedPlaylists = playlists.items
.map((playlist, i) => {
const tracksTotal = playlist.tracks?.total ? playlist.tracks.total : 0;
return `${i + 1}. "${playlist.name}" (${tracksTotal} tracks) - ID: ${
playlist.id
}`;
})
.join('\n');
return {
content: [
{
type: 'text',
text: `# Your Spotify Playlists\n\n${formattedPlaylists}`,
},
],
};
},
};
const getPlaylistTracks: tool<{
playlistId: z.ZodString;
limit: z.ZodOptional<z.ZodNumber>;
}> = {
name: 'getPlaylistTracks',
description: 'Get a list of tracks in a Spotify playlist',
schema: {
playlistId: z.string().describe('The Spotify ID of the playlist'),
limit: z
.number()
.min(1)
.max(50)
.optional()
.describe('Maximum number of tracks to return (1-50)'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { playlistId, limit = 50 } = args;
const playlistTracks = await handleSpotifyRequest(async (spotifyApi) => {
return await spotifyApi.playlists.getPlaylistItems(
playlistId,
undefined,
undefined,
limit as MaxInt<50>,
);
});
if ((playlistTracks.items?.length ?? 0) === 0) {
return {
content: [
{
type: 'text',
text: "This playlist doesn't have any tracks",
},
],
};
}
const formattedTracks = playlistTracks.items
.map((item, i) => {
const { track } = item;
if (!track) return `${i + 1}. [Removed track]`;
if (isTrack(track)) {
const artists = track.artists.map((a) => a.name).join(', ');
const duration = formatDuration(track.duration_ms);
return `${i + 1}. "${track.name}" by ${artists} (${duration}) - ID: ${track.id}`;
}
return `${i + 1}. Unknown item`;
})
.join('\n');
return {
content: [
{
type: 'text',
text: `# Tracks in Playlist\n\n${formattedTracks}`,
},
],
};
},
};
const getRecentlyPlayed: tool<{
limit: z.ZodOptional<z.ZodNumber>;
}> = {
name: 'getRecentlyPlayed',
description: 'Get a list of recently played tracks on Spotify',
schema: {
limit: z
.number()
.min(1)
.max(50)
.optional()
.describe('Maximum number of tracks to return (1-50)'),
},
handler: async (args, extra: SpotifyHandlerExtra) => {
const { limit = 50 } = args;
const history = await handleSpotifyRequest(async (spotifyApi) => {
return await spotifyApi.player.getRecentlyPlayedTracks(
limit as MaxInt<50>,
);
});
if (history.items.length === 0) {
return {
content: [
{
type: 'text',
text: "You don't have any recently played tracks on Spotify",
},
],
};
}
const formattedHistory = history.items
.map((item, i) => {
const track = item.track;
if (!track) return `${i + 1}. [Removed track]`;
if (isTrack(track)) {
const artists = track.artists.map((a) => a.name).join(', ');
const duration = formatDuration(track.duration_ms);
return `${i + 1}. "${track.name}" by ${artists} (${duration}) - ID: ${track.id}`;
}
return `${i + 1}. Unknown item`;
})
.join('\n');
return {
content: [
{
type: 'text',
text: `# Recently Played Tracks\n\n${formattedHistory}`,
},
],
};
}
}
export const readTools = [
searchSpotify,
getNowPlaying,
getMyPlaylists,
getPlaylistTracks,
getRecentlyPlayed,
];

View file

@ -0,0 +1,47 @@
import type { RequestHandlerExtra } from '@modelcontextprotocol/sdk/shared/protocol.js';
import { ServerNotification, ServerRequest } from '@modelcontextprotocol/sdk/types.js';
import type { z } from 'zod';
export type SpotifyHandlerExtra = RequestHandlerExtra<ServerRequest, ServerNotification>;
export type tool<Args extends z.ZodRawShape> = {
name: string;
description: string;
schema: Args;
handler: (
args: z.infer<z.ZodObject<Args>>,
extra: SpotifyHandlerExtra,
) =>
| Promise<{
content: Array<{
type: 'text';
text: string;
}>;
}>
| {
content: Array<{
type: 'text';
text: string;
}>;
};
};
export interface SpotifyArtist {
id: string;
name: string;
}
export interface SpotifyAlbum {
id: string;
name: string;
artists: SpotifyArtist[];
}
export interface SpotifyTrack {
id: string;
name: string;
type: string;
duration_ms: number;
artists: SpotifyArtist[];
album: SpotifyAlbum;
}

View file

@ -0,0 +1,301 @@
import { SpotifyApi } from '@spotify/web-api-ts-sdk';
import crypto from 'node:crypto';
import fs from 'node:fs';
import http from 'node:http';
import path from 'node:path';
import { fileURLToPath, URL } from 'node:url';
import open from 'open';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const CONFIG_FILE = path.join(__dirname, '../spotify-config.json');
export interface SpotifyConfig {
clientId: string;
clientSecret: string;
redirectUri: string;
accessToken?: string;
refreshToken?: string;
}
export function loadSpotifyConfig(): SpotifyConfig {
if (!fs.existsSync(CONFIG_FILE)) {
throw new Error(
`Spotify configuration file not found at ${CONFIG_FILE}. Please create one with clientId, clientSecret, and redirectUri.`,
);
}
try {
const config = JSON.parse(fs.readFileSync(CONFIG_FILE, 'utf8'));
if (!config.clientId || !config.clientSecret || !config.redirectUri) {
throw new Error(
'Spotify configuration must include clientId, clientSecret, and redirectUri.',
);
}
return config;
} catch (error) {
throw new Error(
`Failed to parse Spotify configuration: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
export function saveSpotifyConfig(config: SpotifyConfig): void {
fs.writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2), 'utf8');
}
let cachedSpotifyApi: SpotifyApi | null = null;
export function createSpotifyApi(): SpotifyApi {
if (cachedSpotifyApi) {
return cachedSpotifyApi;
}
const config = loadSpotifyConfig();
if (config.accessToken && config.refreshToken) {
const accessToken = {
access_token: config.accessToken,
token_type: 'Bearer',
expires_in: 3600 * 24 * 30, // Default to 1 month
refresh_token: config.refreshToken,
};
cachedSpotifyApi = SpotifyApi.withAccessToken(config.clientId, accessToken);
return cachedSpotifyApi;
}
cachedSpotifyApi = SpotifyApi.withClientCredentials(
config.clientId,
config.clientSecret,
);
return cachedSpotifyApi;
}
function generateRandomString(length: number): string {
const array = new Uint8Array(length);
crypto.getRandomValues(array);
return Array.from(array)
.map((b) =>
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'.charAt(
b % 62,
),
)
.join('');
}
function base64Encode(str: string): string {
return Buffer.from(str).toString('base64');
}
async function exchangeCodeForToken(
code: string,
config: SpotifyConfig,
): Promise<{ access_token: string; refresh_token: string }> {
const tokenUrl = 'https://accounts.spotify.com/api/token';
const authHeader = `Basic ${base64Encode(`${config.clientId}:${config.clientSecret}`)}`;
const params = new URLSearchParams();
params.append('grant_type', 'authorization_code');
params.append('code', code);
params.append('redirect_uri', config.redirectUri);
const response = await fetch(tokenUrl, {
method: 'POST',
headers: {
Authorization: authHeader,
'Content-Type': 'application/x-www-form-urlencoded',
},
body: params,
});
if (!response.ok) {
const errorData = await response.text();
throw new Error(`Failed to exchange code for token: ${errorData}`);
}
const data = await response.json();
return {
access_token: data.access_token,
refresh_token: data.refresh_token,
};
}
export async function authorizeSpotify(): Promise<void> {
const config = loadSpotifyConfig();
const redirectUri = new URL(config.redirectUri);
if (
redirectUri.hostname !== 'localhost' &&
redirectUri.hostname !== '127.0.0.1'
) {
console.error(
'Error: Redirect URI must use localhost for automatic token exchange',
);
console.error(
'Please update your spotify-config.json with a localhost redirect URI',
);
console.error('Example: http://127.0.0.1:8888/callback');
process.exit(1);
}
const port = redirectUri.port || '80';
const callbackPath = redirectUri.pathname || '/callback';
const state = generateRandomString(16);
const scopes = [
'user-read-private',
'user-read-email',
'user-read-playback-state',
'user-modify-playback-state',
'user-read-currently-playing',
'playlist-read-private',
'playlist-modify-private',
'playlist-modify-public',
'user-library-read',
'user-library-modify',
'user-read-recently-played',
'user-modify-playback-state',
'user-read-playback-state',
'user-read-currently-playing'
];
const authParams = new URLSearchParams({
client_id: config.clientId,
response_type: 'code',
redirect_uri: config.redirectUri,
scope: scopes.join(' '),
state: state,
show_dialog: 'true',
});
const authorizationUrl = `https://accounts.spotify.com/authorize?${authParams.toString()}`;
const authPromise = new Promise<void>((resolve, reject) => {
// Create HTTP server to handle the callback
const server = http.createServer(async (req, res) => {
if (!req.url) {
return res.end('No URL provided');
}
const reqUrl = new URL(req.url, `http://localhost:${port}`);
if (reqUrl.pathname === callbackPath) {
const code = reqUrl.searchParams.get('code');
const returnedState = reqUrl.searchParams.get('state');
const error = reqUrl.searchParams.get('error');
res.writeHead(200, { 'Content-Type': 'text/html' });
if (error) {
console.error(`Authorization error: ${error}`);
res.end(
'<html><body><h1>Authentication Failed</h1><p>Please close this window and try again.</p></body></html>',
);
server.close();
reject(new Error(`Authorization failed: ${error}`));
return;
}
if (returnedState !== state) {
console.error('State mismatch error');
res.end(
'<html><body><h1>Authentication Failed</h1><p>State verification failed. Please close this window and try again.</p></body></html>',
);
server.close();
reject(new Error('State mismatch'));
return;
}
if (!code) {
console.error('No authorization code received');
res.end(
'<html><body><h1>Authentication Failed</h1><p>No authorization code received. Please close this window and try again.</p></body></html>',
);
server.close();
reject(new Error('No authorization code received'));
return;
}
try {
const tokens = await exchangeCodeForToken(code, config);
config.accessToken = tokens.access_token;
config.refreshToken = tokens.refresh_token;
saveSpotifyConfig(config);
res.end(
'<html><body><h1>Authentication Successful!</h1><p>You can now close this window and return to the application.</p></body></html>',
);
console.log(
'Authentication successful! Access token has been saved.',
);
server.close();
resolve();
} catch (error) {
console.error('Token exchange error:', error);
res.end(
'<html><body><h1>Authentication Failed</h1><p>Failed to exchange authorization code for tokens. Please close this window and try again.</p></body></html>',
);
server.close();
reject(error);
}
} else {
res.writeHead(404);
res.end();
}
});
server.listen(Number.parseInt(port), '127.0.0.1', () => {
console.log(
`Listening for Spotify authentication callback on port ${port}`,
);
console.log('Opening browser for authorization...');
open(authorizationUrl).catch((error: Error) => {
console.log(
'Failed to open browser automatically. Please visit this URL to authorize:',
);
console.log(authorizationUrl);
});
});
server.on('error', (error) => {
console.error(`Server error: ${error.message}`);
reject(error);
});
});
await authPromise;
}
export function formatDuration(ms: number): string {
const minutes = Math.floor(ms / 60000);
const seconds = ((ms % 60000) / 1000).toFixed(0);
return `${minutes}:${seconds.padStart(2, '0')}`;
}
export async function handleSpotifyRequest<T>(
action: (spotifyApi: SpotifyApi) => Promise<T>,
): Promise<T> {
try {
const spotifyApi = createSpotifyApi();
return await action(spotifyApi);
} catch (error) {
// Skip JSON parsing errors as these are actually successful operations
const errorMessage = error instanceof Error ? error.message : String(error);
if (
errorMessage.includes('Unexpected token') ||
errorMessage.includes('Unexpected non-whitespace character') ||
errorMessage.includes('Exponent part is missing a number in JSON')
) {
return undefined as T;
}
// Rethrow other errors
throw error;
}
}

View file

@ -0,0 +1,22 @@
livekit-agents
livekit-plugins-openai
livekit-plugins-silero
pydantic-ai
python-dotenv
openai
google-api-python-client
google-auth-oauthlib
apscheduler
fastapi
livekit-plugins-deepgram
livekit-plugins-turn-detector
aiohttp
mem0ai
openai-agents-mcp
wandb
torch
jsonlines
markdown
tqdm
# For local editable install of atropos, if desired in requirements:
# atropos @ file:./atropos#egg=atropos

File diff suppressed because it is too large Load diff