atropos/environments/community/router_env/engine/agents/agent_mcp_mvp.py
2025-05-23 13:56:05 +10:00

61 lines
1.9 KiB
Python

import logging
import os
from pathlib import Path
from dotenv import load_dotenv
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp
from livekit.plugins import deepgram, openai, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
load_dotenv(os.path.join(os.path.dirname(__file__), "..", "..", ".env"))
logger = logging.getLogger("mcp-agent")
load_dotenv(dotenv_path=Path(__file__).parent.parent / ".env")
class MyAgent(Agent):
def __init__(self) -> None:
super().__init__(
instructions=(
"You can retrieve data via the MCP server. The interface is voice-based: "
"accept spoken user queries and respond with synthesized speech."
),
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="ash"),
turn_detection=MultilingualModel(),
mcp_servers=[
mcp.MCPServerHTTP(
url=(
"https://mcp.gumloop.com/gcalendar/"
"cY3bcaFS1qNdeVBnj0XIhnP4FEp2%3Aae99858e75594251bea9e05f32bb99b3"
),
timeout=5,
client_session_timeout_seconds=5,
),
],
)
async def on_enter(self):
self.session.generate_reply()
async def entrypoint(ctx: JobContext):
await ctx.connect()
session = AgentSession(
vad=silero.VAD.load(),
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(model="gpt-4o-mini"),
tts=openai.TTS(voice="ash"),
turn_detection=MultilingualModel(),
)
await session.start(agent=MyAgent(), room=ctx.room)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, agent_name="mcp-agent"))