Updated livekit 1.0 integration (#3073)
This commit is contained in:
@@ -12,7 +12,12 @@ Before you begin, make sure you have:
|
|||||||
|
|
||||||
1. Installed Livekit Agents SDK with voice dependencies of silero and deepgram:
|
1. Installed Livekit Agents SDK with voice dependencies of silero and deepgram:
|
||||||
```bash
|
```bash
|
||||||
pip install livekit-agents[silero,openai,deepgram]
|
pip install livekit livekit-agents \
|
||||||
|
livekit-plugins-silero \
|
||||||
|
livekit-plugins-deepgram \
|
||||||
|
livekit-plugins-openai \
|
||||||
|
livekit-plugins-turn-detector \
|
||||||
|
livekit-plugins-noise-cancellation
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Installed Mem0 SDK:
|
2. Installed Mem0 SDK:
|
||||||
@@ -39,284 +44,123 @@ Let's break down the key components of this implementation using LiveKit Agents:
|
|||||||
### 1. Setting Up Dependencies and Environment
|
### 1. Setting Up Dependencies and Environment
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
from typing import List, Dict, Any, Annotated
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
import aiohttp
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from mem0 import AsyncMemoryClient
|
||||||
|
|
||||||
from livekit.agents import (
|
from livekit.agents import (
|
||||||
|
JobContext,
|
||||||
|
WorkerOptions,
|
||||||
|
cli,
|
||||||
|
ChatContext,
|
||||||
|
ChatMessage,
|
||||||
|
RoomInputOptions,
|
||||||
Agent,
|
Agent,
|
||||||
AgentSession,
|
AgentSession,
|
||||||
AutoSubscribe,
|
|
||||||
JobContext,
|
|
||||||
llm,
|
|
||||||
function_tool,
|
|
||||||
RunContext,
|
|
||||||
cli,
|
|
||||||
WorkerOptions,
|
|
||||||
ModelSettings,
|
|
||||||
)
|
)
|
||||||
from livekit.plugins import deepgram, openai, silero
|
from livekit.plugins import openai, silero, deepgram, noise_cancellation
|
||||||
from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
from livekit.plugins.turn_detector.english import EnglishModel
|
||||||
from mem0 import AsyncMemoryClient
|
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logger = logging.getLogger("memory-assistant")
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
|
|
||||||
# Define a global user ID for simplicity
|
|
||||||
USER_ID = "voice_user"
|
|
||||||
|
|
||||||
# Initialize Mem0 client
|
|
||||||
mem0 = AsyncMemoryClient()
|
|
||||||
```
|
```
|
||||||
|
|
||||||
This section handles:
|
### 2. Mem0 Client and Agent Definition
|
||||||
- Importing required modules
|
|
||||||
- Loading environment variables
|
|
||||||
- Setting up logging
|
|
||||||
- Extracting user identification
|
|
||||||
- Initializing the Mem0 client
|
|
||||||
|
|
||||||
### 2. Memory Enrichment Function
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
async def _enrich_with_memory(chat_ctx: llm.ChatContext):
|
# User ID for RAG data in Mem0
|
||||||
"""Add memories and augment chat context with relevant memories"""
|
RAG_USER_ID = "livekit-mem0"
|
||||||
if not chat_ctx.messages:
|
mem0_client = AsyncMemoryClient()
|
||||||
return
|
|
||||||
|
|
||||||
# Get the latest user message
|
|
||||||
user_msg = chat_ctx.messages[-1]
|
|
||||||
if user_msg.role != "user":
|
|
||||||
return
|
|
||||||
|
|
||||||
user_content = user_msg.text_content()
|
|
||||||
if not user_content:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Store user message in Mem0
|
|
||||||
await mem0.add(
|
|
||||||
[{"role": "user", "content": user_content}],
|
|
||||||
user_id=USER_ID
|
|
||||||
)
|
|
||||||
|
|
||||||
# Search for relevant memories
|
|
||||||
results = await mem0.search(
|
|
||||||
user_content,
|
|
||||||
user_id=USER_ID,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Augment context with retrieved memories
|
|
||||||
if results:
|
|
||||||
memories = ' '.join([result["memory"] for result in results])
|
|
||||||
logger.info(f"Enriching with memory: {memories}")
|
|
||||||
|
|
||||||
# Add memory context as a assistant message
|
|
||||||
memory_msg = llm.ChatMessage.create(
|
|
||||||
text=f"Relevant Memory: {memories}\n",
|
|
||||||
role="assistant",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Modify chat context with retrieved memories
|
|
||||||
chat_ctx.messages[-1] = memory_msg
|
|
||||||
chat_ctx.messages.append(user_msg)
|
|
||||||
```
|
|
||||||
|
|
||||||
This function:
|
|
||||||
- Stores user messages in Mem0
|
|
||||||
- Performs semantic search for relevant memories
|
|
||||||
- Augments the chat context with retrieved memories
|
|
||||||
- Enables contextually aware responses
|
|
||||||
|
|
||||||
### 3. Prewarm and Entrypoint Functions
|
|
||||||
|
|
||||||
```python
|
|
||||||
def prewarm_process(proc):
|
|
||||||
"""Preload components to speed up session start"""
|
|
||||||
proc.userdata["vad"] = silero.VAD.load()
|
|
||||||
|
|
||||||
async def entrypoint(ctx: JobContext):
|
|
||||||
"""Main entrypoint for the memory-enabled voice agent"""
|
|
||||||
|
|
||||||
# Connect to LiveKit room
|
|
||||||
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
|
||||||
|
|
||||||
# Create agent session with modern 1.0 architecture
|
|
||||||
session = AgentSession(
|
|
||||||
stt=deepgram.STT(),
|
|
||||||
llm=openai.LLM(model="gpt-4o-mini"),
|
|
||||||
tts=openai.TTS(),
|
|
||||||
vad=silero.VAD.load(),
|
|
||||||
turn_detection=MultilingualModel(),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create memory-enabled agent
|
|
||||||
agent = MemoryEnabledAgent()
|
|
||||||
|
|
||||||
# Start the session
|
|
||||||
await session.start(
|
|
||||||
room=ctx.room,
|
|
||||||
agent=agent,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initial greeting
|
|
||||||
await session.generate_reply(
|
|
||||||
instructions="Greet the user warmly as George the travel guide and ask how you can help them plan their next adventure."
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
The entrypoint function:
|
|
||||||
- Connects to LiveKit room
|
|
||||||
- Initializes Mem0 memory client
|
|
||||||
- Create agent session using `AgentSession` orchestrator with memory enrichment
|
|
||||||
- Uses modern turn detection with `MultilingualModel()`
|
|
||||||
- Starts the agent with an initial greeting
|
|
||||||
|
|
||||||
## Create a Memory-Enabled Voice Agent
|
|
||||||
|
|
||||||
Now that we've explained each component, here's the complete implementation that combines OpenAI Agents SDK for voice with Mem0's memory capabilities:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
from typing import AsyncIterable, Any
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from livekit.agents import (
|
|
||||||
Agent,
|
|
||||||
AgentSession,
|
|
||||||
JobContext,
|
|
||||||
llm,
|
|
||||||
function_tool,
|
|
||||||
RunContext,
|
|
||||||
cli,
|
|
||||||
WorkerOptions,
|
|
||||||
ModelSettings,
|
|
||||||
)
|
|
||||||
from livekit.plugins import deepgram, openai, silero
|
|
||||||
from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
|
||||||
from mem0 import AsyncMemoryClient
|
|
||||||
|
|
||||||
# Load environment variables
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logger = logging.getLogger("memory-assistant")
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
|
|
||||||
# Define a global user ID for simplicity
|
|
||||||
USER_ID = "voice_user"
|
|
||||||
|
|
||||||
# Initialize Mem0 memory client
|
|
||||||
mem0 = AsyncMemoryClient()
|
|
||||||
|
|
||||||
class MemoryEnabledAgent(Agent):
|
class MemoryEnabledAgent(Agent):
|
||||||
"""Travel guide agent with Mem0 memory integration"""
|
"""
|
||||||
|
An agent that can answer questions using RAG (Retrieval Augmented Generation) with Mem0.
|
||||||
def __init__(self):
|
"""
|
||||||
|
def __init__(self) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
instructions="""
|
instructions="""
|
||||||
You are a helpful voice assistant.
|
You are a helpful voice assistant.
|
||||||
You are a travel guide named George and will help the user to plan a travel trip of their dreams.
|
You are a travel guide named George and will help the user to plan a travel trip of their dreams.
|
||||||
You should help the user plan for various adventures like work retreats, family vacations or solo backpacking trips.
|
You should help the user plan for various adventures like work retreats, family vacations or solo backpacking trips.
|
||||||
You should be careful to not suggest anything that would be dangerous, illegal or inappropriate.
|
You should be careful to not suggest anything that would be dangerous, illegal or inappropriate.
|
||||||
You can remember past interactions and use them to inform your answers.
|
You can remember past interactions and use them to inform your answers.
|
||||||
Use semantic memory retrieval to provide contextually relevant responses.
|
Use semantic memory retrieval to provide contextually relevant responses.
|
||||||
"""
|
""",
|
||||||
|
)
|
||||||
|
self._seen_results = set() # Track previously seen result IDs
|
||||||
|
logger.info(f"Mem0 Agent initialized. Using user_id: {RAG_USER_ID}")
|
||||||
|
|
||||||
|
async def on_enter(self):
|
||||||
|
self.session.generate_reply(
|
||||||
|
instructions="Briefly greet the user and offer your assistance."
|
||||||
)
|
)
|
||||||
|
|
||||||
async def llm_node(
|
async def on_user_turn_completed(self, turn_ctx: ChatContext, new_message: ChatMessage) -> None:
|
||||||
self,
|
# Persist the user message in Mem0
|
||||||
chat_ctx: llm.ChatContext,
|
try:
|
||||||
tools: list[llm.FunctionTool],
|
logger.info(f"Adding user message to Mem0: {new_message.text_content}")
|
||||||
model_settings: ModelSettings,
|
add_result = await mem0_client.add(
|
||||||
) -> AsyncIterable[llm.ChatChunk]:
|
[{"role": "user", "content": new_message.text_content}],
|
||||||
"""Override LLM node to add memory enrichment before inference"""
|
user_id=RAG_USER_ID
|
||||||
|
|
||||||
# Enrich context with memory before LLM inference
|
|
||||||
await self._enrich_with_memory(chat_ctx)
|
|
||||||
|
|
||||||
# Call default LLM node with enriched context
|
|
||||||
async for chunk in Agent.default.llm_node(self, chat_ctx, tools, model_settings):
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
async def _enrich_with_memory(self, chat_ctx: llm.ChatContext):
|
|
||||||
"""Add memories and augment chat context with relevant memories"""
|
|
||||||
if not chat_ctx.messages:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Get the latest user message
|
|
||||||
user_msg = chat_ctx.messages[-1]
|
|
||||||
if user_msg.role != "user":
|
|
||||||
return
|
|
||||||
|
|
||||||
user_content = user_msg.text_content()
|
|
||||||
if not user_content:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Store user message in Mem0
|
|
||||||
await mem0.add(
|
|
||||||
[{"role": "user", "content": user_content}],
|
|
||||||
user_id=USER_ID
|
|
||||||
)
|
|
||||||
|
|
||||||
# Search for relevant memories
|
|
||||||
results = await mem0.search(
|
|
||||||
user_content,
|
|
||||||
user_id=USER_ID,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Augment context with retrieved memories
|
|
||||||
if results:
|
|
||||||
memories = ' '.join([result["memory"] for result in results])
|
|
||||||
logger.info(f"Enriching with memory: {memories}")
|
|
||||||
|
|
||||||
# Add memory context as a assistant message
|
|
||||||
memory_msg = llm.ChatMessage.create(
|
|
||||||
text=f"Relevant Memory: {memories}\n",
|
|
||||||
role="assistant",
|
|
||||||
)
|
)
|
||||||
|
logger.info(f"Mem0 add result (user): {add_result}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to store user message in Mem0: {e}")
|
||||||
|
|
||||||
# Modify chat context with retrieved memories
|
# RAG: Retrieve relevant context from Mem0 and inject as assistant message
|
||||||
chat_ctx.messages[-1] = memory_msg
|
try:
|
||||||
chat_ctx.messages.append(user_msg)
|
logger.info("About to await mem0_client.search for RAG context")
|
||||||
|
search_results = await mem0_client.search(
|
||||||
|
new_message.text_content,
|
||||||
|
user_id=RAG_USER_ID,
|
||||||
|
)
|
||||||
|
logger.info(f"mem0_client.search returned: {search_results}")
|
||||||
|
if search_results and isinstance(search_results, list):
|
||||||
|
context_parts = []
|
||||||
|
for result in search_results:
|
||||||
|
paragraph = result.get("memory") or result.get("text")
|
||||||
|
if paragraph:
|
||||||
|
source = "mem0 Memories"
|
||||||
|
if "from [" in paragraph:
|
||||||
|
source = paragraph.split("from [")[1].split("]")[0]
|
||||||
|
paragraph = paragraph.split("]")[1].strip()
|
||||||
|
context_parts.append(f"Source: {source}\nContent: {paragraph}\n")
|
||||||
|
if context_parts:
|
||||||
|
full_context = "\n\n".join(context_parts)
|
||||||
|
logger.info(f"Injecting RAG context: {full_context}")
|
||||||
|
turn_ctx.add_message(role="assistant", content=full_context)
|
||||||
|
await self.update_chat_ctx(turn_ctx)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to inject RAG context from Mem0: {e}")
|
||||||
|
|
||||||
def prewarm_process(proc):
|
await super().on_user_turn_completed(turn_ctx, new_message)
|
||||||
"""Preload components to speed up session start"""
|
```
|
||||||
proc.userdata["vad"] = silero.VAD.load()
|
|
||||||
|
|
||||||
|
### 3. Entrypoint and Session Setup
|
||||||
|
|
||||||
|
```python
|
||||||
async def entrypoint(ctx: JobContext):
|
async def entrypoint(ctx: JobContext):
|
||||||
"""Main entrypoint for the memory-enabled voice agent"""
|
"""Main entrypoint for the agent."""
|
||||||
|
await ctx.connect()
|
||||||
|
|
||||||
# Connect to LiveKit room
|
|
||||||
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
|
||||||
|
|
||||||
# Initialize Mem0 client
|
|
||||||
mem0 = AsyncMemoryClient()
|
|
||||||
|
|
||||||
# Create agent session with modern 1.0 architecture
|
|
||||||
session = AgentSession(
|
session = AgentSession(
|
||||||
stt=deepgram.STT(),
|
stt=deepgram.STT(),
|
||||||
llm=openai.LLM(model="gpt-4o-mini"),
|
llm=openai.LLM(model="gpt-4o-mini"),
|
||||||
tts=openai.TTS(),
|
tts=openai.TTS(voice="ash",),
|
||||||
|
turn_detection=EnglishModel(),
|
||||||
vad=silero.VAD.load(),
|
vad=silero.VAD.load(),
|
||||||
turn_detection=MultilingualModel(),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create memory-enabled agent
|
|
||||||
agent = MemoryEnabledAgent()
|
|
||||||
|
|
||||||
# Start the session
|
|
||||||
await session.start(
|
await session.start(
|
||||||
|
agent=MemoryEnabledAgent(),
|
||||||
room=ctx.room,
|
room=ctx.room,
|
||||||
agent=agent,
|
room_input_options=RoomInputOptions(
|
||||||
|
noise_cancellation=noise_cancellation.BVC(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initial greeting
|
# Initial greeting
|
||||||
@@ -327,10 +171,7 @@ async def entrypoint(ctx: JobContext):
|
|||||||
|
|
||||||
# Run the application
|
# Run the application
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cli.run_app(WorkerOptions(
|
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
|
||||||
entrypoint_fnc=entrypoint,
|
|
||||||
prewarm_fnc=prewarm_process
|
|
||||||
))
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Key Features of This Implementation
|
## Key Features of This Implementation
|
||||||
@@ -352,6 +193,11 @@ To run this example:
|
|||||||
```sh
|
```sh
|
||||||
python mem0-livekit-voice-agent.py start
|
python mem0-livekit-voice-agent.py start
|
||||||
```
|
```
|
||||||
|
or to start your agent in console mode to run inside your terminal:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
python mem0-livekit-voice-agent.py console
|
||||||
|
```
|
||||||
5. After the script starts, you can interact with the voice agent using [Livekit's Agent Platform](https://agents-playground.livekit.io/) and connect to the agent inorder to start conversations.
|
5. After the script starts, you can interact with the voice agent using [Livekit's Agent Platform](https://agents-playground.livekit.io/) and connect to the agent inorder to start conversations.
|
||||||
|
|
||||||
## Best Practices for Voice Agents with Memory
|
## Best Practices for Voice Agents with Memory
|
||||||
@@ -381,10 +227,14 @@ logging.basicConfig(
|
|||||||
logger = logging.getLogger("memory_voice_agent")
|
logger = logging.getLogger("memory_voice_agent")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- Check the logs for any issues with API keys, connectivity, or memory operations.
|
||||||
|
- Ensure your `.env` file is correctly configured and loaded.
|
||||||
|
|
||||||
|
|
||||||
## Help & Resources
|
## Help & Resources
|
||||||
|
|
||||||
- [LiveKit Documentation](https://docs.livekit.io/)
|
- [LiveKit Documentation](https://docs.livekit.io/)
|
||||||
- [Mem0 Platform](https://app.mem0.ai/)
|
- [Mem0 Platform](https://app.mem0.ai/)
|
||||||
- Need assistance? Reach out through:
|
- Need assistance? Reach out through:
|
||||||
|
|
||||||
<Snippet file="get-help.mdx" />
|
<Snippet file="get-help.mdx" />
|
||||||
|
|||||||
Reference in New Issue
Block a user