Livekit Docs Update (#2933)
This commit is contained in:
@@ -34,7 +34,7 @@ To view all supported llms, visit the [Supported LLMs](./models).
|
|||||||
<Card title="Gemini" href="/components/llms/models/gemini" />
|
<Card title="Gemini" href="/components/llms/models/gemini" />
|
||||||
<Card title="DeepSeek" href="/components/llms/models/deepseek" />
|
<Card title="DeepSeek" href="/components/llms/models/deepseek" />
|
||||||
<Card title="xAI" href="/components/llms/models/xAI" />
|
<Card title="xAI" href="/components/llms/models/xAI" />
|
||||||
<Card title="XAI" href="/components/llms/models/sarvam" />
|
<Card title="Sarvam AI" href="/components/llms/models/sarvam" />
|
||||||
<Card title="LM Studio" href="/components/llms/models/lmstudio" />
|
<Card title="LM Studio" href="/components/llms/models/lmstudio" />
|
||||||
<Card title="Langchain" href="/components/llms/models/langchain" />
|
<Card title="Langchain" href="/components/llms/models/langchain" />
|
||||||
</CardGroup>
|
</CardGroup>
|
||||||
|
|||||||
@@ -12,8 +12,7 @@ Before you begin, make sure you have:
|
|||||||
|
|
||||||
1. Installed Livekit Agents SDK with voice dependencies of silero and deepgram:
|
1. Installed Livekit Agents SDK with voice dependencies of silero and deepgram:
|
||||||
```bash
|
```bash
|
||||||
pip install livekit \
|
pip install livekit-agents[voice] \
|
||||||
livekit-agents \
|
|
||||||
livekit-plugins-silero \
|
livekit-plugins-silero \
|
||||||
livekit-plugins-deepgram \
|
livekit-plugins-deepgram \
|
||||||
livekit-plugins-openai
|
livekit-plugins-openai
|
||||||
@@ -38,7 +37,7 @@ OPENAI_API_KEY=your_openai_api_key
|
|||||||
|
|
||||||
## Code Breakdown
|
## Code Breakdown
|
||||||
|
|
||||||
Let's break down the key components of this implementation:
|
Let's break down the key components of this implementation using LiveKit Agents:
|
||||||
|
|
||||||
### 1. Setting Up Dependencies and Environment
|
### 1. Setting Up Dependencies and Environment
|
||||||
|
|
||||||
@@ -51,17 +50,19 @@ from typing import List, Dict, Any, Annotated
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from livekit.agents import (
|
from livekit.agents import (
|
||||||
|
Agent,
|
||||||
|
AgentSession,
|
||||||
AutoSubscribe,
|
AutoSubscribe,
|
||||||
JobContext,
|
JobContext,
|
||||||
JobProcess,
|
|
||||||
WorkerOptions,
|
|
||||||
cli,
|
|
||||||
llm,
|
llm,
|
||||||
metrics,
|
function_tool,
|
||||||
|
RunContext,
|
||||||
|
cli,
|
||||||
|
WorkerOptions,
|
||||||
|
ModelSettings,
|
||||||
)
|
)
|
||||||
from livekit import rtc, api
|
|
||||||
from livekit.agents.pipeline import VoicePipelineAgent
|
|
||||||
from livekit.plugins import deepgram, openai, silero
|
from livekit.plugins import deepgram, openai, silero
|
||||||
|
from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
||||||
from mem0 import AsyncMemoryClient
|
from mem0 import AsyncMemoryClient
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables
|
||||||
@@ -88,21 +89,29 @@ This section handles:
|
|||||||
### 2. Memory Enrichment Function
|
### 2. Memory Enrichment Function
|
||||||
|
|
||||||
```python
|
```python
|
||||||
async def _enrich_with_memory(agent: VoicePipelineAgent, chat_ctx: llm.ChatContext):
|
async def _enrich_with_memory(chat_ctx: llm.ChatContext):
|
||||||
"""Add memories and Augment chat context with relevant memories"""
|
"""Add memories and augment chat context with relevant memories"""
|
||||||
if not chat_ctx.messages:
|
if not chat_ctx.messages:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Store user message in Mem0
|
# Get the latest user message
|
||||||
user_msg = chat_ctx.messages[-1]
|
user_msg = chat_ctx.messages[-1]
|
||||||
|
if user_msg.role != "user":
|
||||||
|
return
|
||||||
|
|
||||||
|
user_content = user_msg.text_content()
|
||||||
|
if not user_content:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Store user message in Mem0
|
||||||
await mem0.add(
|
await mem0.add(
|
||||||
[{"role": "user", "content": user_msg.content}],
|
[{"role": "user", "content": user_content}],
|
||||||
user_id=USER_ID
|
user_id=USER_ID
|
||||||
)
|
)
|
||||||
|
|
||||||
# Search for relevant memories
|
# Search for relevant memories
|
||||||
results = await mem0.search(
|
results = await mem0.search(
|
||||||
user_msg.content,
|
user_content,
|
||||||
user_id=USER_ID,
|
user_id=USER_ID,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -111,13 +120,14 @@ async def _enrich_with_memory(agent: VoicePipelineAgent, chat_ctx: llm.ChatConte
|
|||||||
memories = ' '.join([result["memory"] for result in results])
|
memories = ' '.join([result["memory"] for result in results])
|
||||||
logger.info(f"Enriching with memory: {memories}")
|
logger.info(f"Enriching with memory: {memories}")
|
||||||
|
|
||||||
rag_msg = llm.ChatMessage.create(
|
# Add memory context as a assistant message
|
||||||
|
memory_msg = llm.ChatMessage.create(
|
||||||
text=f"Relevant Memory: {memories}\n",
|
text=f"Relevant Memory: {memories}\n",
|
||||||
role="assistant",
|
role="assistant",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Modify chat context with retrieved memories
|
# Modify chat context with retrieved memories
|
||||||
chat_ctx.messages[-1] = rag_msg
|
chat_ctx.messages[-1] = memory_msg
|
||||||
chat_ctx.messages.append(user_msg)
|
chat_ctx.messages.append(user_msg)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -130,62 +140,45 @@ This function:
|
|||||||
### 3. Prewarm and Entrypoint Functions
|
### 3. Prewarm and Entrypoint Functions
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def prewarm_process(proc: JobProcess):
|
def prewarm_process(proc):
|
||||||
# Preload silero VAD in memory to speed up session start
|
"""Preload components to speed up session start"""
|
||||||
proc.userdata["vad"] = silero.VAD.load()
|
proc.userdata["vad"] = silero.VAD.load()
|
||||||
|
|
||||||
async def entrypoint(ctx: JobContext):
|
async def entrypoint(ctx: JobContext):
|
||||||
|
"""Main entrypoint for the memory-enabled voice agent"""
|
||||||
|
|
||||||
# Connect to LiveKit room
|
# Connect to LiveKit room
|
||||||
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
||||||
|
|
||||||
# Wait for participant
|
# Create agent session with modern 1.0 architecture
|
||||||
participant = await ctx.wait_for_participant()
|
session = AgentSession(
|
||||||
|
|
||||||
# Initialize Mem0 client
|
|
||||||
mem0 = AsyncMemoryClient()
|
|
||||||
|
|
||||||
# Define initial system context
|
|
||||||
initial_ctx = llm.ChatContext().append(
|
|
||||||
role="system",
|
|
||||||
text=(
|
|
||||||
"""
|
|
||||||
You are a helpful voice assistant.
|
|
||||||
You are a travel guide named George and will help the user to plan a travel trip of their dreams.
|
|
||||||
You should help the user plan for various adventures like work retreats, family vacations or solo backpacking trips.
|
|
||||||
You should be careful to not suggest anything that would be dangerous, illegal or inappropriate.
|
|
||||||
You can remember past interactions and use them to inform your answers.
|
|
||||||
Use semantic memory retrieval to provide contextually relevant responses.
|
|
||||||
"""
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create VoicePipelineAgent with memory capabilities
|
|
||||||
agent = VoicePipelineAgent(
|
|
||||||
chat_ctx=initial_ctx,
|
|
||||||
vad=silero.VAD.load(),
|
|
||||||
stt=deepgram.STT(),
|
stt=deepgram.STT(),
|
||||||
llm=openai.LLM(model="gpt-4o-mini"),
|
llm=openai.LLM(model="gpt-4o-mini"),
|
||||||
tts=openai.TTS(),
|
tts=openai.TTS(),
|
||||||
before_llm_cb=_enrich_with_memory,
|
vad=silero.VAD.load(),
|
||||||
|
turn_detection=MultilingualModel(),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Start agent and initial greeting
|
# Create memory-enabled agent
|
||||||
agent.start(ctx.room, participant)
|
agent = MemoryEnabledAgent()
|
||||||
await agent.say(
|
|
||||||
"Hello! I'm George. Can I help you plan an upcoming trip? ",
|
# Start the session
|
||||||
allow_interruptions=True
|
await session.start(
|
||||||
|
room=ctx.room,
|
||||||
|
agent=agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Run the application
|
# Initial greeting
|
||||||
if __name__ == "__main__":
|
await session.generate_reply(
|
||||||
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm_process))
|
instructions="Greet the user warmly as George the travel guide and ask how you can help them plan their next adventure."
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
The entrypoint function:
|
The entrypoint function:
|
||||||
- Connects to LiveKit room
|
- Connects to LiveKit room
|
||||||
- Initializes Mem0 memory client
|
- Initializes Mem0 memory client
|
||||||
- Sets up initial system context
|
- Create agent session using `AgentSession` orchestrator with memory enrichment
|
||||||
- Creates a VoicePipelineAgent with memory enrichment
|
- Uses modern turn detection with `MultilingualModel()`
|
||||||
- Starts the agent with an initial greeting
|
- Starts the agent with an initial greeting
|
||||||
|
|
||||||
## Create a Memory-Enabled Voice Agent
|
## Create a Memory-Enabled Voice Agent
|
||||||
@@ -196,22 +189,22 @@ Now that we've explained each component, here's the complete implementation that
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from typing import List, Dict, Any, Annotated
|
from typing import AsyncIterable, Any
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from livekit.agents import (
|
from livekit.agents import (
|
||||||
AutoSubscribe,
|
Agent,
|
||||||
|
AgentSession,
|
||||||
JobContext,
|
JobContext,
|
||||||
JobProcess,
|
|
||||||
WorkerOptions,
|
|
||||||
cli,
|
|
||||||
llm,
|
llm,
|
||||||
metrics,
|
function_tool,
|
||||||
|
RunContext,
|
||||||
|
cli,
|
||||||
|
WorkerOptions,
|
||||||
|
ModelSettings,
|
||||||
)
|
)
|
||||||
from livekit import rtc, api
|
|
||||||
from livekit.agents.pipeline import VoicePipelineAgent
|
|
||||||
from livekit.plugins import deepgram, openai, silero
|
from livekit.plugins import deepgram, openai, silero
|
||||||
|
from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
||||||
from mem0 import AsyncMemoryClient
|
from mem0 import AsyncMemoryClient
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables
|
||||||
@@ -227,32 +220,59 @@ USER_ID = "voice_user"
|
|||||||
# Initialize Mem0 memory client
|
# Initialize Mem0 memory client
|
||||||
mem0 = AsyncMemoryClient()
|
mem0 = AsyncMemoryClient()
|
||||||
|
|
||||||
def prewarm_process(proc: JobProcess):
|
class MemoryEnabledAgent(Agent):
|
||||||
# Preload silero VAD in memory to speed up session start
|
"""Travel guide agent with Mem0 memory integration"""
|
||||||
proc.userdata["vad"] = silero.VAD.load()
|
|
||||||
|
|
||||||
async def entrypoint(ctx: JobContext):
|
def __init__(self):
|
||||||
# Connect to LiveKit room
|
super().__init__(
|
||||||
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
instructions="""
|
||||||
|
You are a helpful voice assistant.
|
||||||
|
You are a travel guide named George and will help the user to plan a travel trip of their dreams.
|
||||||
|
You should help the user plan for various adventures like work retreats, family vacations or solo backpacking trips.
|
||||||
|
You should be careful to not suggest anything that would be dangerous, illegal or inappropriate.
|
||||||
|
You can remember past interactions and use them to inform your answers.
|
||||||
|
Use semantic memory retrieval to provide contextually relevant responses.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
# Wait for participant
|
async def llm_node(
|
||||||
participant = await ctx.wait_for_participant()
|
self,
|
||||||
|
chat_ctx: llm.ChatContext,
|
||||||
|
tools: list[llm.FunctionTool],
|
||||||
|
model_settings: ModelSettings,
|
||||||
|
) -> AsyncIterable[llm.ChatChunk]:
|
||||||
|
"""Override LLM node to add memory enrichment before inference"""
|
||||||
|
|
||||||
async def _enrich_with_memory(agent: VoicePipelineAgent, chat_ctx: llm.ChatContext):
|
# Enrich context with memory before LLM inference
|
||||||
"""Add memories and Augment chat context with relevant memories"""
|
await self._enrich_with_memory(chat_ctx)
|
||||||
|
|
||||||
|
# Call default LLM node with enriched context
|
||||||
|
async for chunk in Agent.default.llm_node(self, chat_ctx, tools, model_settings):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
async def _enrich_with_memory(self, chat_ctx: llm.ChatContext):
|
||||||
|
"""Add memories and augment chat context with relevant memories"""
|
||||||
if not chat_ctx.messages:
|
if not chat_ctx.messages:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Store user message in Mem0
|
# Get the latest user message
|
||||||
user_msg = chat_ctx.messages[-1]
|
user_msg = chat_ctx.messages[-1]
|
||||||
|
if user_msg.role != "user":
|
||||||
|
return
|
||||||
|
|
||||||
|
user_content = user_msg.text_content()
|
||||||
|
if not user_content:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Store user message in Mem0
|
||||||
await mem0.add(
|
await mem0.add(
|
||||||
[{"role": "user", "content": user_msg.content}],
|
[{"role": "user", "content": user_content}],
|
||||||
user_id=USER_ID
|
user_id=USER_ID
|
||||||
)
|
)
|
||||||
|
|
||||||
# Search for relevant memories
|
# Search for relevant memories
|
||||||
results = await mem0.search(
|
results = await mem0.search(
|
||||||
user_msg.content,
|
user_content,
|
||||||
user_id=USER_ID,
|
user_id=USER_ID,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -261,58 +281,68 @@ async def entrypoint(ctx: JobContext):
|
|||||||
memories = ' '.join([result["memory"] for result in results])
|
memories = ' '.join([result["memory"] for result in results])
|
||||||
logger.info(f"Enriching with memory: {memories}")
|
logger.info(f"Enriching with memory: {memories}")
|
||||||
|
|
||||||
rag_msg = llm.ChatMessage.create(
|
# Add memory context as a assistant message
|
||||||
|
memory_msg = llm.ChatMessage.create(
|
||||||
text=f"Relevant Memory: {memories}\n",
|
text=f"Relevant Memory: {memories}\n",
|
||||||
role="assistant",
|
role="assistant",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Modify chat context with retrieved memories
|
# Modify chat context with retrieved memories
|
||||||
chat_ctx.messages[-1] = rag_msg
|
chat_ctx.messages[-1] = memory_msg
|
||||||
chat_ctx.messages.append(user_msg)
|
chat_ctx.messages.append(user_msg)
|
||||||
|
|
||||||
# Define initial system context
|
def prewarm_process(proc):
|
||||||
initial_ctx = llm.ChatContext().append(
|
"""Preload components to speed up session start"""
|
||||||
role="system",
|
proc.userdata["vad"] = silero.VAD.load()
|
||||||
text=(
|
|
||||||
"""
|
|
||||||
You are a helpful voice assistant.
|
|
||||||
You are a travel guide named George and will help the user to plan a travel trip of their dreams.
|
|
||||||
You should help the user plan for various adventures like work retreats, family vacations or solo backpacking trips.
|
|
||||||
You should be careful to not suggest anything that would be dangerous, illegal or inappropriate.
|
|
||||||
You can remember past interactions and use them to inform your answers.
|
|
||||||
Use semantic memory retrieval to provide contextually relevant responses.
|
|
||||||
"""
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create VoicePipelineAgent with memory capabilities
|
async def entrypoint(ctx: JobContext):
|
||||||
agent = VoicePipelineAgent(
|
"""Main entrypoint for the memory-enabled voice agent"""
|
||||||
chat_ctx=initial_ctx,
|
|
||||||
vad=silero.VAD.load(),
|
# Connect to LiveKit room
|
||||||
|
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
||||||
|
|
||||||
|
# Initialize Mem0 client
|
||||||
|
mem0 = AsyncMemoryClient()
|
||||||
|
|
||||||
|
# Create agent session with modern 1.0 architecture
|
||||||
|
session = AgentSession(
|
||||||
stt=deepgram.STT(),
|
stt=deepgram.STT(),
|
||||||
llm=openai.LLM(model="gpt-4o-mini"),
|
llm=openai.LLM(model="gpt-4o-mini"),
|
||||||
tts=openai.TTS(),
|
tts=openai.TTS(),
|
||||||
before_llm_cb=_enrich_with_memory,
|
vad=silero.VAD.load(),
|
||||||
|
turn_detection=MultilingualModel(),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Start agent and initial greeting
|
# Create memory-enabled agent
|
||||||
agent.start(ctx.room, participant)
|
agent = MemoryEnabledAgent()
|
||||||
await agent.say(
|
|
||||||
"Hello! I'm George. Can I help you plan an upcoming trip? ",
|
# Start the session
|
||||||
|
await session.start(
|
||||||
|
room=ctx.room,
|
||||||
|
agent=agent,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initial greeting
|
||||||
|
await session.generate_reply(
|
||||||
|
instructions="Greet the user warmly as George the travel guide and ask how you can help them plan their next adventure.",
|
||||||
allow_interruptions=True
|
allow_interruptions=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Run the application
|
# Run the application
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm_process))
|
cli.run_app(WorkerOptions(
|
||||||
|
entrypoint_fnc=entrypoint,
|
||||||
|
prewarm_fnc=prewarm_process
|
||||||
|
))
|
||||||
```
|
```
|
||||||
|
|
||||||
## Key Features of This Implementation
|
## Key Features of This Implementation
|
||||||
|
|
||||||
1. **Semantic Memory Retrieval**: Uses Mem0 to store and retrieve contextually relevant memories
|
1. **Semantic Memory Retrieval**: Uses Mem0 to store and retrieve contextually relevant memories
|
||||||
2. **Voice Interaction**: Leverages LiveKit for voice communication
|
2. **Voice Interaction**: Leverages LiveKit for voice communication with proper turn detection
|
||||||
3. **Intelligent Context Management**: Augments conversations with past interactions
|
3. **Intelligent Context Management**: Augments conversations with past interactions
|
||||||
4. **Travel Planning Specialization**: Focused on creating a helpful travel guide assistant
|
4. **Travel Planning Specialization**: Focused on creating a helpful travel guide assistant
|
||||||
|
5. **Function Tools**: Modern tool definition for enhanced capabilities
|
||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
@@ -325,13 +355,13 @@ To run this example:
|
|||||||
```sh
|
```sh
|
||||||
python mem0-livekit-voice-agent.py start
|
python mem0-livekit-voice-agent.py start
|
||||||
```
|
```
|
||||||
5. After the script starts, you can interact with the voice agent using [Livekit's Agent Platform](https://agents-playground.livekit.io/) and Connect to the agent inorder to start conversations.
|
5. After the script starts, you can interact with the voice agent using [Livekit's Agent Platform](https://agents-playground.livekit.io/) and connect to the agent inorder to start conversations.
|
||||||
|
|
||||||
## Best Practices for Voice Agents with Memory
|
## Best Practices for Voice Agents with Memory
|
||||||
|
|
||||||
1. **Context Preservation**: Store enough context with each memory for effective retrieval
|
1. **Context Preservation**: Store enough context with each memory for effective retrieval
|
||||||
2. **Privacy Considerations**: Implement secure memory management
|
2. **Privacy Considerations**: Implement secure memory management
|
||||||
3. **Relevant Memory Filtering**: Use semantic search to retrieve only the most pertinent memories
|
3. **Relevant Memory Filtering**: Use semantic search to retrieve only the most relevant memories
|
||||||
4. **Error Handling**: Implement robust error handling for memory operations
|
4. **Error Handling**: Implement robust error handling for memory operations
|
||||||
|
|
||||||
## Debugging Function Tools
|
## Debugging Function Tools
|
||||||
|
|||||||
Reference in New Issue
Block a user