From 393a4fd5a6cfeb754857a2229726f567a9fadf36 Mon Sep 17 00:00:00 2001
From: Prateek Chhikara <46902268+prateekchhikara@users.noreply.github.com>
Date: Tue, 29 Apr 2025 08:15:25 -0700
Subject: [PATCH] Docs Update (#2591)
---
README.md | 161 +++++--------
docs/_snippets/paper-release.mdx | 3 +
docs/api-reference.mdx | 2 +
docs/changelog.mdx | 2 +
docs/components/embedders/config.mdx | 2 +
docs/components/embedders/overview.mdx | 2 +
docs/components/llms/config.mdx | 2 +
docs/components/llms/models/anthropic.mdx | 2 +
docs/components/llms/models/aws_bedrock.mdx | 2 +
docs/components/llms/models/azure_openai.mdx | 2 +
docs/components/llms/models/deepseek.mdx | 2 +
docs/components/llms/models/gemini.mdx | 2 +
docs/components/llms/models/google_AI.mdx | 2 +
docs/components/llms/models/groq.mdx | 2 +
docs/components/llms/models/langchain.mdx | 2 +
docs/components/llms/models/litellm.mdx | 2 +
docs/components/llms/models/lmstudio.mdx | 2 +
docs/components/llms/models/mistral_AI.mdx | 2 +
docs/components/llms/models/ollama.mdx | 2 +
docs/components/llms/models/openai.mdx | 2 +
docs/components/llms/models/together.mdx | 2 +
docs/components/llms/models/xAI.mdx | 2 +
docs/components/llms/overview.mdx | 2 +
docs/components/vectordbs/config.mdx | 2 +
docs/components/vectordbs/overview.mdx | 2 +
docs/contributing/development.mdx | 2 +
docs/contributing/documentation.mdx | 2 +
docs/core-concepts/memory-operations.mdx | 2 +
docs/core-concepts/memory-types.mdx | 3 +
docs/examples.mdx | 2 +
docs/examples/ai_companion.mdx | 2 +
docs/examples/ai_companion_js.mdx | 2 +
docs/examples/chrome-extension.mdx | 2 +
docs/examples/customer-support-agent.mdx | 2 +
docs/examples/document-writing.mdx | 1 +
docs/examples/email_processing.mdx | 2 +
docs/examples/llama-index-mem0.mdx | 2 +
docs/examples/mem0-agentic-tool.mdx | 2 +
docs/examples/mem0-demo.mdx | 3 +
docs/examples/mem0-mastra.mdx | 2 +
docs/examples/mem0-openai-voice-demo.mdx | 2 +
docs/examples/mem0-with-ollama.mdx | 2 +
docs/examples/multimodal-demo.mdx | 2 +
docs/examples/openai-inbuilt-tools.mdx | 2 +
docs/examples/personal-ai-tutor.mdx | 2 +
docs/examples/personal-travel-assistant.mdx | 3 +
docs/examples/personalized-deep-research.mdx | 2 +
docs/examples/youtube-assistant.mdx | 2 +
docs/faqs.mdx | 1 +
docs/features.mdx | 2 +
docs/features/advanced-retrieval.mdx | 2 +
docs/features/async-client.mdx | 1 +
docs/features/contextual-add.mdx | 2 +
docs/features/custom-categories.mdx | 2 +
.../custom-fact-extraction-prompt.mdx | 2 +
docs/features/custom-instructions.mdx | 2 +
docs/features/custom-update-memory-prompt.mdx | 5 +-
docs/features/direct-import.mdx | 2 +
docs/features/expiration-date.mdx | 2 +
docs/features/feedback-mechanism.mdx | 2 +
docs/features/graph-memory.mdx | 2 +
docs/features/memory-export.mdx | 2 +
docs/features/multimodal-support.mdx | 2 +
docs/features/openai_compatibility.mdx | 2 +
docs/features/platform-overview.mdx | 2 +
docs/features/selective-memory.mdx | 2 +
docs/features/timestamp.mdx | 2 +
docs/features/webhooks.mdx | 2 +
docs/integrations.mdx | 2 +
docs/integrations/agno.mdx | 1 +
docs/integrations/autogen.mdx | 2 +
docs/integrations/crewai.mdx | 2 +
docs/integrations/dify.mdx | 2 +
docs/integrations/elevenlabs.mdx | 2 +
docs/integrations/flowise.mdx | 2 +
docs/integrations/keywords.mdx | 2 +
docs/integrations/langchain-tools.mdx | 2 +
docs/integrations/langchain.mdx | 2 +
docs/integrations/langgraph.mdx | 2 +
docs/integrations/livekit.mdx | 2 +
docs/integrations/llama-index.mdx | 2 +
docs/integrations/mcp-server.mdx | 2 +
docs/integrations/multion.mdx | 2 +
docs/integrations/pipecat.mdx | 2 +
docs/integrations/vercel-ai-sdk.mdx | 2 +
docs/open-source/graph_memory/features.mdx | 2 +
docs/open-source/graph_memory/overview.mdx | 2 +
docs/open-source/multimodal-support.mdx | 2 +
docs/open-source/node-quickstart.mdx | 2 +
docs/open-source/python-quickstart.mdx | 2 +
docs/open-source/quickstart.mdx | 2 +
docs/overview.mdx | 2 +
docs/platform/overview.mdx | 2 +
docs/platform/quickstart.mdx | 2 +
docs/quickstart.mdx | 4 +
evaluation/Makefile | 31 +++
evaluation/README.md | 192 +++++++++++++++
evaluation/evals.py | 81 +++++++
evaluation/generate_scores.py | 41 ++++
evaluation/metrics/llm_judge.py | 127 ++++++++++
evaluation/metrics/utils.py | 224 ++++++++++++++++++
evaluation/prompts.py | 147 ++++++++++++
evaluation/run_experiments.py | 102 ++++++++
evaluation/src/langmem.py | 193 +++++++++++++++
evaluation/src/memzero/add.py | 141 +++++++++++
evaluation/src/memzero/search.py | 189 +++++++++++++++
evaluation/src/openai/predict.py | 143 +++++++++++
evaluation/src/rag.py | 197 +++++++++++++++
evaluation/src/utils.py | 12 +
evaluation/src/zep/add.py | 73 ++++++
evaluation/src/zep/search.py | 148 ++++++++++++
111 files changed, 2296 insertions(+), 99 deletions(-)
create mode 100644 docs/_snippets/paper-release.mdx
create mode 100644 evaluation/Makefile
create mode 100644 evaluation/evals.py
create mode 100644 evaluation/generate_scores.py
create mode 100644 evaluation/metrics/llm_judge.py
create mode 100644 evaluation/metrics/utils.py
create mode 100644 evaluation/prompts.py
create mode 100644 evaluation/run_experiments.py
create mode 100644 evaluation/src/langmem.py
create mode 100644 evaluation/src/memzero/add.py
create mode 100644 evaluation/src/memzero/search.py
create mode 100644 evaluation/src/openai/predict.py
create mode 100644 evaluation/src/rag.py
create mode 100644 evaluation/src/utils.py
create mode 100644 evaluation/src/zep/add.py
create mode 100644 evaluation/src/zep/search.py
diff --git a/README.md b/README.md
index 2de88672..ac2bba6d 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,20 @@
-
+
+
-
-
-
-
-
+
+
-
-
- Learn more
- ·
- Join Discord
- ·
- Demo
-
+
+ Learn more
+ ·
+ Join Discord
+ ·
+ Demo
@@ -26,55 +22,71 @@
-
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+ 📄 Building Production-Ready AI Agents with Scalable Long-Term Memory →
+
+
+ ⚡ +26% Accuracy vs. OpenAI Memory • 🚀 91% Faster • 💰 90% Fewer Tokens
+
+
+## 🔥 Research Highlights
+- **+26% Accuracy** over OpenAI Memory on the LOCOMO benchmark
+- **91% Faster Responses** than full-context, ensuring low-latency at scale
+- **90% Lower Token Usage** than full-context, cutting costs without compromise
+- [Read the full paper](https://mem0.ai/research)
# Introduction
-[Mem0](https://mem0.ai) (pronounced as "mem-zero") enhances AI assistants and agents with an intelligent memory layer, enabling personalized AI interactions. Mem0 remembers user preferences, adapts to individual needs, and continuously improves over time, making it ideal for customer support chatbots, AI assistants, and autonomous systems.
+[Mem0](https://mem0.ai) ("mem-zero") enhances AI assistants and agents with an intelligent memory layer, enabling personalized AI interactions. It remembers user preferences, adapts to individual needs, and continuously learns over time—ideal for customer support chatbots, AI assistants, and autonomous systems.
-### Features & Use Cases
+### Key Features & Use Cases
-Core Capabilities:
-- **Multi-Level Memory**: User, Session, and AI Agent memory retention with adaptive personalization
-- **Developer-Friendly**: Simple API integration, cross-platform consistency, and hassle-free managed service
+**Core Capabilities:**
+- **Multi-Level Memory**: Seamlessly retains User, Session, and Agent state with adaptive personalization
+- **Developer-Friendly**: Intuitive API, cross-platform SDKs, and a fully managed service option
-Applications:
-- **AI Assistants**: Seamless conversations with context and personalization
-- **Learning & Support**: Tailored content recommendations and context-aware customer assistance
-- **Healthcare & Companions**: Patient history tracking and deeper relationship building
-- **Productivity & Gaming**: Streamlined workflows and adaptive environments based on user behavior
+**Applications:**
+- **AI Assistants**: Consistent, context-rich conversations
+- **Customer Support**: Recall past tickets and user history for tailored help
+- **Healthcare**: Track patient preferences and history for personalized care
+- **Productivity & Gaming**: Adaptive workflows and environments based on user behavior
-## Get Started
+## 🚀 Quickstart Guide
-Get started quickly with [Mem0 Platform](https://app.mem0.ai) - our fully managed solution that provides automatic updates, advanced analytics, enterprise security, and dedicated support. [Create a free account](https://app.mem0.ai) to begin.
+Choose between our hosted platform or self-hosted package:
-For complete control, you can self-host Mem0 using our open-source package. See the [Quickstart guide](#quickstart) below to set up your own instance.
+### Hosted Platform
-## Quickstart Guide
+Get up and running in minutes with automatic updates, analytics, and enterprise security.
-Install the Mem0 package via pip:
+1. Sign up on [Mem0 Platform](https://app.mem0.ai)
+2. Embed the memory layer via SDK or API keys
+
+### Self-Hosted (Open Source)
+
+Install the sdk via pip:
```bash
pip install mem0ai
```
-Install the Mem0 package via npm:
-
+Install sdk via npm:
```bash
npm install mem0ai
```
@@ -96,7 +108,7 @@ def chat_with_memories(message: str, user_id: str = "default_user") -> str:
# Retrieve relevant memories
relevant_memories = memory.search(query=message, user_id=user_id, limit=3)
memories_str = "\n".join(f"- {entry['memory']}" for entry in relevant_memories["results"])
-
+
# Generate Assistant response
system_prompt = f"You are a helpful AI. Answer the question based on query and memories.\nUser Memories:\n{memories_str}"
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": message}]
@@ -122,68 +134,21 @@ if __name__ == "__main__":
main()
```
-See the example for [Node.js](https://docs.mem0.ai/examples/ai_companion_js).
+For detailed integration steps, see the [Quickstart](https://docs.mem0.ai/quickstart) and [API Reference](https://docs.mem0.ai).
-For more advanced usage and API documentation, visit our [documentation](https://docs.mem0.ai).
+## 🔗 Integrations & Demos
-> [!TIP]
-> For a hassle-free experience, try our [hosted platform](https://app.mem0.ai) with automatic updates and enterprise features.
+- **ChatGPT with Memory**: Personalized chat powered by Mem0 ([Live Demo](https://mem0.dev/demo))
+- **Browser Extension**: Store memories across ChatGPT, Perplexity, and Claude ([Chrome Extension](https://chromewebstore.google.com/detail/mem0))
+- **Langgraph Support**: Build a customer bot with Langgraph + Mem0 ([Guide](https://docs.mem0.ai/integrations/langgraph))
+- **CrewAI Integration**: Tailor CrewAI outputs with Mem0 ([Example](https://docs.mem0.ai/integrations/crewai))
-## Demos
+## 📚 Documentation & Support
-- Mem0 - ChatGPT with Memory: A personalized AI chat app powered by Mem0 that remembers your preferences, facts, and memories.
+- Full docs: https://docs.mem0.ai
+- Community: [Discord](https://mem0.dev/DiG) · [Twitter](https://x.com/mem0ai)
+- Contact: founders@mem0.ai
-[Mem0 - ChatGPT with Memory](https://github.com/user-attachments/assets/cebc4f8e-bdb9-4837-868d-13c5ab7bb433)
+## ⚖️ License
-Try live [demo](https://mem0.dev/demo/)
-
-
-
-- AI Companion: Experience personalized conversations with an AI that remembers your preferences and past interactions
-
-[AI Companion Demo](https://github.com/user-attachments/assets/3fc72023-a72c-4593-8be0-3cee3ba744da)
-
-
-
-- Enhance your AI interactions by storing memories across ChatGPT, Perplexity, and Claude using our browser extension. Get [chrome extension](https://chromewebstore.google.com/detail/mem0/onihkkbipkfeijkadecaafbgagkhglop?hl=en).
-
-
-[Chrome Extension Demo](https://github.com/user-attachments/assets/ca92e40b-c453-4ff6-b25e-739fb18a8650)
-
-
-
-- Customer support bot using Langgraph and Mem0. Get the complete code from [here](https://docs.mem0.ai/integrations/langgraph)
-
-
-[Langgraph: Customer Bot](https://github.com/user-attachments/assets/ca6b482e-7f46-42c8-aa08-f88d1d93a5f4)
-
-
-
-- Use Mem0 with CrewAI to get personalized results. Full example [here](https://docs.mem0.ai/integrations/crewai)
-
-[CrewAI Demo](https://github.com/user-attachments/assets/69172a79-ccb9-4340-91f1-caa7d2dd4213)
-
-
-
-## Documentation
-
-For detailed usage instructions and API reference, visit our [documentation](https://docs.mem0.ai). You'll find:
-- Complete API reference
-- Integration guides
-- Advanced configuration options
-- Best practices and examples
-- More details about:
- - Open-source version
- - [Hosted Mem0 Platform](https://app.mem0.ai)
-
-## Support
-
-Join our community for support and discussions. If you have any questions, feel free to reach out to us using one of the following methods:
-
-- [Join our Discord](https://mem0.dev/DiG)
-- [Follow us on Twitter](https://x.com/mem0ai)
-- [Email founders](mailto:founders@mem0.ai)
-
-## License
-
-This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
+Apache 2.0 — see the [LICENSE](LICENSE) file for details.
\ No newline at end of file
diff --git a/docs/_snippets/paper-release.mdx b/docs/_snippets/paper-release.mdx
new file mode 100644
index 00000000..ba9229bd
--- /dev/null
+++ b/docs/_snippets/paper-release.mdx
@@ -0,0 +1,3 @@
+
+ 📢 Announcing our research paper: Mem0 achieves 26% higher accuracy than OpenAI Memory, 91% lower latency, and 90% token savings! [Read the paper](https://mem0.ai/research) to learn how we're revolutionizing AI agent memory.
+
\ No newline at end of file
diff --git a/docs/api-reference.mdx b/docs/api-reference.mdx
index c994ec71..ef01d747 100644
--- a/docs/api-reference.mdx
+++ b/docs/api-reference.mdx
@@ -4,6 +4,8 @@ icon: "info"
iconType: "solid"
---
+
+
Mem0 provides a powerful set of APIs that allow you to integrate advanced memory management capabilities into your applications. Our APIs are designed to be intuitive, efficient, and scalable, enabling you to create, retrieve, update, and delete memories across various entities such as users, agents, apps, and runs.
## Key Features
diff --git a/docs/changelog.mdx b/docs/changelog.mdx
index b27bb623..209ac2a1 100644
--- a/docs/changelog.mdx
+++ b/docs/changelog.mdx
@@ -3,6 +3,8 @@ title: "Product Updates"
mode: "wide"
---
+
+
diff --git a/docs/components/embedders/config.mdx b/docs/components/embedders/config.mdx
index dc84c497..a8884c32 100644
--- a/docs/components/embedders/config.mdx
+++ b/docs/components/embedders/config.mdx
@@ -4,6 +4,8 @@ icon: "gear"
iconType: "solid"
---
+
+
Config in mem0 is a dictionary that specifies the settings for your embedding models. It allows you to customize the behavior and connection details of your chosen embedder.
## How to define configurations?
diff --git a/docs/components/embedders/overview.mdx b/docs/components/embedders/overview.mdx
index f98f2db4..3ae4d592 100644
--- a/docs/components/embedders/overview.mdx
+++ b/docs/components/embedders/overview.mdx
@@ -4,6 +4,8 @@ icon: "info"
iconType: "solid"
---
+
+
Mem0 offers support for various embedding models, allowing users to choose the one that best suits their needs.
## Supported Embedders
diff --git a/docs/components/llms/config.mdx b/docs/components/llms/config.mdx
index 57c8f70c..bb13cbcb 100644
--- a/docs/components/llms/config.mdx
+++ b/docs/components/llms/config.mdx
@@ -4,6 +4,8 @@ icon: "gear"
iconType: "solid"
---
+
+
## How to define configurations?
diff --git a/docs/components/llms/models/anthropic.mdx b/docs/components/llms/models/anthropic.mdx
index 84ed1011..e30988cd 100644
--- a/docs/components/llms/models/anthropic.mdx
+++ b/docs/components/llms/models/anthropic.mdx
@@ -2,6 +2,8 @@
title: Anthropic
---
+
+
To use anthropic's models, please set the `ANTHROPIC_API_KEY` which you find on their [Account Settings Page](https://console.anthropic.com/account/keys).
## Usage
diff --git a/docs/components/llms/models/aws_bedrock.mdx b/docs/components/llms/models/aws_bedrock.mdx
index bd04d99b..5561e698 100644
--- a/docs/components/llms/models/aws_bedrock.mdx
+++ b/docs/components/llms/models/aws_bedrock.mdx
@@ -2,6 +2,8 @@
title: AWS Bedrock
---
+
+
### Setup
- Before using the AWS Bedrock LLM, make sure you have the appropriate model access from [Bedrock Console](https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/modelaccess).
- You will also need to authenticate the `boto3` client by using a method in the [AWS documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials)
diff --git a/docs/components/llms/models/azure_openai.mdx b/docs/components/llms/models/azure_openai.mdx
index 4b333590..e1b6ddb8 100644
--- a/docs/components/llms/models/azure_openai.mdx
+++ b/docs/components/llms/models/azure_openai.mdx
@@ -2,6 +2,8 @@
title: Azure OpenAI
---
+
+
Mem0 Now Supports Azure OpenAI Models in TypeScript SDK
To use Azure OpenAI models, you have to set the `LLM_AZURE_OPENAI_API_KEY`, `LLM_AZURE_ENDPOINT`, `LLM_AZURE_DEPLOYMENT` and `LLM_AZURE_API_VERSION` environment variables. You can obtain the Azure API key from the [Azure](https://azure.microsoft.com/).
diff --git a/docs/components/llms/models/deepseek.mdx b/docs/components/llms/models/deepseek.mdx
index af1783a1..56fc7f42 100644
--- a/docs/components/llms/models/deepseek.mdx
+++ b/docs/components/llms/models/deepseek.mdx
@@ -2,6 +2,8 @@
title: DeepSeek
---
+
+
To use DeepSeek LLM models, you have to set the `DEEPSEEK_API_KEY` environment variable. You can also optionally set `DEEPSEEK_API_BASE` if you need to use a different API endpoint (defaults to "https://api.deepseek.com").
## Usage
diff --git a/docs/components/llms/models/gemini.mdx b/docs/components/llms/models/gemini.mdx
index 4c166ea4..7a502ad5 100644
--- a/docs/components/llms/models/gemini.mdx
+++ b/docs/components/llms/models/gemini.mdx
@@ -2,6 +2,8 @@
title: Gemini
---
+
+
To use Gemini model, you have to set the `GEMINI_API_KEY` environment variable. You can obtain the Gemini API key from the [Google AI Studio](https://aistudio.google.com/app/apikey)
## Usage
diff --git a/docs/components/llms/models/google_AI.mdx b/docs/components/llms/models/google_AI.mdx
index 4e08ea74..a3ed2393 100644
--- a/docs/components/llms/models/google_AI.mdx
+++ b/docs/components/llms/models/google_AI.mdx
@@ -2,6 +2,8 @@
title: Google AI
---
+
+
To use Google AI model, you have to set the `GOOGLE_API_KEY` environment variable. You can obtain the Google API key from the [Google Maker Suite](https://makersuite.google.com/app/apikey)
## Usage
diff --git a/docs/components/llms/models/groq.mdx b/docs/components/llms/models/groq.mdx
index d8f0727c..556ac763 100644
--- a/docs/components/llms/models/groq.mdx
+++ b/docs/components/llms/models/groq.mdx
@@ -2,6 +2,8 @@
title: Groq
---
+
+
[Groq](https://groq.com/) is the creator of the world's first Language Processing Unit (LPU), providing exceptional speed performance for AI workloads running on their LPU Inference Engine.
In order to use LLMs from Groq, go to their [platform](https://console.groq.com/keys) and get the API key. Set the API key as `GROQ_API_KEY` environment variable to use the model as given below in the example.
diff --git a/docs/components/llms/models/langchain.mdx b/docs/components/llms/models/langchain.mdx
index 4113bc05..8f5744ac 100644
--- a/docs/components/llms/models/langchain.mdx
+++ b/docs/components/llms/models/langchain.mdx
@@ -2,6 +2,8 @@
title: LangChain
---
+
+
Mem0 supports LangChain as a provider to access a wide range of LLM models. LangChain is a framework for developing applications powered by language models, making it easy to integrate various LLM providers through a consistent interface.
For a complete list of available chat models supported by LangChain, refer to the [LangChain Chat Models documentation](https://python.langchain.com/docs/integrations/chat).
diff --git a/docs/components/llms/models/litellm.mdx b/docs/components/llms/models/litellm.mdx
index d66669f8..4d6caa6f 100644
--- a/docs/components/llms/models/litellm.mdx
+++ b/docs/components/llms/models/litellm.mdx
@@ -1,3 +1,5 @@
+
+
[Litellm](https://litellm.vercel.app/docs/) is compatible with over 100 large language models (LLMs), all using a standardized input/output format. You can explore the [available models](https://litellm.vercel.app/docs/providers) to use with Litellm. Ensure you set the `API_KEY` for the model you choose to use.
## Usage
diff --git a/docs/components/llms/models/lmstudio.mdx b/docs/components/llms/models/lmstudio.mdx
index f88490db..9c519561 100644
--- a/docs/components/llms/models/lmstudio.mdx
+++ b/docs/components/llms/models/lmstudio.mdx
@@ -2,6 +2,8 @@
title: LM Studio
---
+
+
To use LM Studio with Mem0, you'll need to have LM Studio running locally with its server enabled. LM Studio provides a way to run local LLMs with an OpenAI-compatible API.
## Usage
diff --git a/docs/components/llms/models/mistral_AI.mdx b/docs/components/llms/models/mistral_AI.mdx
index 632d4877..855ccaef 100644
--- a/docs/components/llms/models/mistral_AI.mdx
+++ b/docs/components/llms/models/mistral_AI.mdx
@@ -2,6 +2,8 @@
title: Mistral AI
---
+
+
To use mistral's models, please obtain the Mistral AI api key from their [console](https://console.mistral.ai/). Set the `MISTRAL_API_KEY` environment variable to use the model as given below in the example.
## Usage
diff --git a/docs/components/llms/models/ollama.mdx b/docs/components/llms/models/ollama.mdx
index 757fd2cc..8eb14e3f 100644
--- a/docs/components/llms/models/ollama.mdx
+++ b/docs/components/llms/models/ollama.mdx
@@ -1,3 +1,5 @@
+
+
You can use LLMs from Ollama to run Mem0 locally. These [models](https://ollama.com/search?c=tools) support tool support.
## Usage
diff --git a/docs/components/llms/models/openai.mdx b/docs/components/llms/models/openai.mdx
index 0c7ecfd5..44e4e3d3 100644
--- a/docs/components/llms/models/openai.mdx
+++ b/docs/components/llms/models/openai.mdx
@@ -2,6 +2,8 @@
title: OpenAI
---
+
+
To use OpenAI LLM models, you have to set the `OPENAI_API_KEY` environment variable. You can obtain the OpenAI API key from the [OpenAI Platform](https://platform.openai.com/account/api-keys).
## Usage
diff --git a/docs/components/llms/models/together.mdx b/docs/components/llms/models/together.mdx
index 63182918..2cbca70d 100644
--- a/docs/components/llms/models/together.mdx
+++ b/docs/components/llms/models/together.mdx
@@ -1,3 +1,5 @@
+
+
To use TogetherAI LLM models, you have to set the `TOGETHER_API_KEY` environment variable. You can obtain the TogetherAI API key from their [Account settings page](https://api.together.xyz/settings/api-keys).
## Usage
diff --git a/docs/components/llms/models/xAI.mdx b/docs/components/llms/models/xAI.mdx
index 39b159ca..6b267e25 100644
--- a/docs/components/llms/models/xAI.mdx
+++ b/docs/components/llms/models/xAI.mdx
@@ -2,6 +2,8 @@
title: xAI
---
+
+
[xAI](https://x.ai/) is a new AI company founded by Elon Musk that develops large language models, including Grok. Grok is trained on real-time data from X (formerly Twitter) and aims to provide accurate, up-to-date responses with a touch of wit and humor.
In order to use LLMs from xAI, go to their [platform](https://console.x.ai) and get the API key. Set the API key as `XAI_API_KEY` environment variable to use the model as given below in the example.
diff --git a/docs/components/llms/overview.mdx b/docs/components/llms/overview.mdx
index a719bc72..e3114bb7 100644
--- a/docs/components/llms/overview.mdx
+++ b/docs/components/llms/overview.mdx
@@ -4,6 +4,8 @@ icon: "info"
iconType: "solid"
---
+
+
Mem0 includes built-in support for various popular large language models. Memory can utilize the LLM provided by the user, ensuring efficient use for specific needs.
## Usage
diff --git a/docs/components/vectordbs/config.mdx b/docs/components/vectordbs/config.mdx
index a36e5579..abe9647e 100644
--- a/docs/components/vectordbs/config.mdx
+++ b/docs/components/vectordbs/config.mdx
@@ -4,6 +4,8 @@ icon: "gear"
iconType: "solid"
---
+
+
## How to define configurations?
The `config` is defined as an object with two main keys:
diff --git a/docs/components/vectordbs/overview.mdx b/docs/components/vectordbs/overview.mdx
index 5d406eb6..bf034882 100644
--- a/docs/components/vectordbs/overview.mdx
+++ b/docs/components/vectordbs/overview.mdx
@@ -4,6 +4,8 @@ icon: "info"
iconType: "solid"
---
+
+
Mem0 includes built-in support for various popular databases. Memory can utilize the database provided by the user, ensuring efficient use for specific needs.
## Supported Vector Databases
diff --git a/docs/contributing/development.mdx b/docs/contributing/development.mdx
index e65b2070..be8aaa62 100644
--- a/docs/contributing/development.mdx
+++ b/docs/contributing/development.mdx
@@ -3,6 +3,8 @@ title: Development
icon: "code"
---
+
+
# Development Contributions
We strive to make contributions **easy, collaborative, and enjoyable**. Follow the steps below to ensure a smooth contribution process.
diff --git a/docs/contributing/documentation.mdx b/docs/contributing/documentation.mdx
index 33b445de..e3dcb19d 100644
--- a/docs/contributing/documentation.mdx
+++ b/docs/contributing/documentation.mdx
@@ -3,6 +3,8 @@ title: Documentation
icon: "book"
---
+
+
# Documentation Contributions
## 📌 Prerequisites
diff --git a/docs/core-concepts/memory-operations.mdx b/docs/core-concepts/memory-operations.mdx
index 5fdffd21..804c3b99 100644
--- a/docs/core-concepts/memory-operations.mdx
+++ b/docs/core-concepts/memory-operations.mdx
@@ -5,6 +5,8 @@ icon: "gear"
iconType: "solid"
---
+
+
Mem0 provides two core operations for managing memories in AI applications: adding new memories and searching existing ones. This guide covers how these operations work and how to use them effectively in your application.
diff --git a/docs/core-concepts/memory-types.mdx b/docs/core-concepts/memory-types.mdx
index 73dfa860..fa63a0e5 100644
--- a/docs/core-concepts/memory-types.mdx
+++ b/docs/core-concepts/memory-types.mdx
@@ -4,6 +4,9 @@ description: Understanding different types of memory in AI Applications
icon: "memory"
iconType: "solid"
---
+
+
+
To build useful AI applications, we need to understand how different memory systems work together. This guide explores the fundamental types of memory in AI systems and shows how Mem0 implements these concepts.
## Why Memory Matters
diff --git a/docs/examples.mdx b/docs/examples.mdx
index ce279b08..ad26577a 100644
--- a/docs/examples.mdx
+++ b/docs/examples.mdx
@@ -3,6 +3,8 @@ title: Overview
description: How to use mem0 in your existing applications?
---
+
+
With Mem0, you can create stateful LLM-based applications such as chatbots, virtual assistants, or AI agents. Mem0 enhances your applications by providing a memory layer that makes responses:
diff --git a/docs/examples/ai_companion.mdx b/docs/examples/ai_companion.mdx
index 896985e0..55aecc7b 100644
--- a/docs/examples/ai_companion.mdx
+++ b/docs/examples/ai_companion.mdx
@@ -2,6 +2,8 @@
title: AI Companion
---
+
+
You can create a personalised AI Companion using Mem0. This guide will walk you through the necessary steps and provide the complete code to get you started.
## Overview
diff --git a/docs/examples/ai_companion_js.mdx b/docs/examples/ai_companion_js.mdx
index d170d12b..59924697 100644
--- a/docs/examples/ai_companion_js.mdx
+++ b/docs/examples/ai_companion_js.mdx
@@ -2,6 +2,8 @@
title: AI Companion in Node.js
---
+
+
You can create a personalised AI Companion using Mem0. This guide will walk you through the necessary steps and provide the complete code to get you started.
## Overview
diff --git a/docs/examples/chrome-extension.mdx b/docs/examples/chrome-extension.mdx
index a9ed8e3d..3177b465 100644
--- a/docs/examples/chrome-extension.mdx
+++ b/docs/examples/chrome-extension.mdx
@@ -1,5 +1,7 @@
# Mem0 Chrome Extension
+
+
Enhance your AI interactions with **Mem0**, a Chrome extension that introduces a universal memory layer across platforms like `ChatGPT`, `Claude`, and `Perplexity`. Mem0 ensures seamless context sharing, making your AI experiences more personalized and efficient.
diff --git a/docs/examples/customer-support-agent.mdx b/docs/examples/customer-support-agent.mdx
index 171ddded..1d234c3d 100644
--- a/docs/examples/customer-support-agent.mdx
+++ b/docs/examples/customer-support-agent.mdx
@@ -2,6 +2,8 @@
title: Customer Support AI Agent
---
+
+
You can create a personalized Customer Support AI Agent using Mem0. This guide will walk you through the necessary steps and provide the complete code to get you started.
## Overview
diff --git a/docs/examples/document-writing.mdx b/docs/examples/document-writing.mdx
index 0bd53b25..81361459 100644
--- a/docs/examples/document-writing.mdx
+++ b/docs/examples/document-writing.mdx
@@ -1,6 +1,7 @@
---
title: Document Editing with Mem0
---
+
This guide demonstrates how to leverage **Mem0** to edit documents efficiently, ensuring they align with your unique writing style and preferences.
diff --git a/docs/examples/email_processing.mdx b/docs/examples/email_processing.mdx
index 572d1832..c3afa2ab 100644
--- a/docs/examples/email_processing.mdx
+++ b/docs/examples/email_processing.mdx
@@ -2,6 +2,8 @@
title: Email Processing with Mem0
---
+
+
This guide demonstrates how to build an intelligent email processing system using Mem0's memory capabilities. You'll learn how to store, categorize, retrieve, and analyze emails to create a smart email management solution.
## Overview
diff --git a/docs/examples/llama-index-mem0.mdx b/docs/examples/llama-index-mem0.mdx
index 6c68abf9..73743292 100644
--- a/docs/examples/llama-index-mem0.mdx
+++ b/docs/examples/llama-index-mem0.mdx
@@ -1,6 +1,8 @@
---
title: LlamaIndex ReAct Agent
---
+
+
Create a ReAct Agent with LlamaIndex which uses Mem0 as the memory store.
### Overview
diff --git a/docs/examples/mem0-agentic-tool.mdx b/docs/examples/mem0-agentic-tool.mdx
index d146d2ec..c6dafd9a 100644
--- a/docs/examples/mem0-agentic-tool.mdx
+++ b/docs/examples/mem0-agentic-tool.mdx
@@ -2,6 +2,8 @@
title: Mem0 as an Agentic Tool
---
+
+
Integrate Mem0's memory capabilities with OpenAI's Agents SDK to create AI agents with persistent memory.
You can create agents that remember past conversations and use that context to provide better responses.
diff --git a/docs/examples/mem0-demo.mdx b/docs/examples/mem0-demo.mdx
index e5d2a379..5c0987ca 100644
--- a/docs/examples/mem0-demo.mdx
+++ b/docs/examples/mem0-demo.mdx
@@ -2,6 +2,9 @@
title: Mem0 Demo
---
+
+
+
You can create a personalized AI Companion using Mem0. This guide will walk you through the necessary steps and provide the complete setup instructions to get you started.
+
In this example you'll learn how to use the Mem0 to add long-term memory capabilities to [Mastra's agent](https://mastra.ai/) via tool-use.
This memory integration can work alongside Mastra's [agent memory features](https://mastra.ai/docs/agents/01-agent-memory).
diff --git a/docs/examples/mem0-openai-voice-demo.mdx b/docs/examples/mem0-openai-voice-demo.mdx
index 42013d45..3e64196d 100644
--- a/docs/examples/mem0-openai-voice-demo.mdx
+++ b/docs/examples/mem0-openai-voice-demo.mdx
@@ -3,6 +3,8 @@ title: 'Mem0 with OpenAI Agents SDK for Voice'
description: 'Integrate memory capabilities into your voice agents using Mem0 and OpenAI Agents SDK'
---
+
+
# Building Voice Agents with Memory using Mem0 and OpenAI Agents SDK
This guide demonstrates how to combine OpenAI's Agents SDK for voice applications with Mem0's memory capabilities to create a voice assistant that remembers user preferences and past interactions.
diff --git a/docs/examples/mem0-with-ollama.mdx b/docs/examples/mem0-with-ollama.mdx
index 664e00b6..84a70090 100644
--- a/docs/examples/mem0-with-ollama.mdx
+++ b/docs/examples/mem0-with-ollama.mdx
@@ -2,6 +2,8 @@
title: Mem0 with Ollama
---
+
+
## Running Mem0 Locally with Ollama
Mem0 can be utilized entirely locally by leveraging Ollama for both the embedding model and the language model (LLM). This guide will walk you through the necessary steps and provide the complete code to get you started.
diff --git a/docs/examples/multimodal-demo.mdx b/docs/examples/multimodal-demo.mdx
index 7c66ed9a..054d5197 100644
--- a/docs/examples/multimodal-demo.mdx
+++ b/docs/examples/multimodal-demo.mdx
@@ -2,6 +2,8 @@
title: Multimodal Demo with Mem0
---
+
+
Enhance your AI interactions with **Mem0**'s multimodal capabilities. Mem0 now supports image understanding, allowing for richer context and more natural interactions across supported AI platforms.
> 🎉 Experience the power of multimodal AI! Test out Mem0's image understanding capabilities at [multimodal-demo.mem0.ai](https://multimodal-demo.mem0.ai)
diff --git a/docs/examples/openai-inbuilt-tools.mdx b/docs/examples/openai-inbuilt-tools.mdx
index fb867f81..c5c0d0c1 100644
--- a/docs/examples/openai-inbuilt-tools.mdx
+++ b/docs/examples/openai-inbuilt-tools.mdx
@@ -2,6 +2,8 @@
title: OpenAI Inbuilt Tools
---
+
+
Integrate Mem0’s memory capabilities with OpenAI’s Inbuilt Tools to create AI agents with persistent memory.
## Getting Started
diff --git a/docs/examples/personal-ai-tutor.mdx b/docs/examples/personal-ai-tutor.mdx
index 220577aa..c62bf4a6 100644
--- a/docs/examples/personal-ai-tutor.mdx
+++ b/docs/examples/personal-ai-tutor.mdx
@@ -2,6 +2,8 @@
title: Personalized AI Tutor
---
+
+
You can create a personalized AI Tutor using Mem0. This guide will walk you through the necessary steps and provide the complete code to get you started.
## Overview
diff --git a/docs/examples/personal-travel-assistant.mdx b/docs/examples/personal-travel-assistant.mdx
index 8894f143..3be2c7db 100644
--- a/docs/examples/personal-travel-assistant.mdx
+++ b/docs/examples/personal-travel-assistant.mdx
@@ -1,6 +1,9 @@
---
title: Personal AI Travel Assistant
---
+
+
+
Create a personalized AI Travel Assistant using Mem0. This guide provides step-by-step instructions and the complete code to get you started.
## Overview
diff --git a/docs/examples/personalized-deep-research.mdx b/docs/examples/personalized-deep-research.mdx
index 763b2568..89428a87 100644
--- a/docs/examples/personalized-deep-research.mdx
+++ b/docs/examples/personalized-deep-research.mdx
@@ -2,6 +2,8 @@
title: Personalized Deep Research
---
+
+
Deep Research is an intelligent agent that synthesizes large amounts of online data and completes complex research tasks, customized to your unique preferences and insights. Built on Mem0's technology, it enhances AI-driven online exploration with personalized memories.
## Overview
diff --git a/docs/examples/youtube-assistant.mdx b/docs/examples/youtube-assistant.mdx
index ffea6fd6..ce382394 100644
--- a/docs/examples/youtube-assistant.mdx
+++ b/docs/examples/youtube-assistant.mdx
@@ -2,6 +2,8 @@
title: YouTube Assistant Extension
---
+
+
Enhance your YouTube experience with Mem0's **YouTube Assistant**, a Chrome extension that brings AI-powered chat directly to your YouTube videos. Get instant, personalized answers about video content while leveraging your own knowledge and memories - all without leaving the page.
## Features
diff --git a/docs/faqs.mdx b/docs/faqs.mdx
index 0ed30d0d..b1d6674a 100644
--- a/docs/faqs.mdx
+++ b/docs/faqs.mdx
@@ -4,6 +4,7 @@ icon: "question"
iconType: "solid"
---
+
diff --git a/docs/features.mdx b/docs/features.mdx
index 2aa0e3a8..491bc2b4 100644
--- a/docs/features.mdx
+++ b/docs/features.mdx
@@ -4,6 +4,8 @@ icon: "wrench"
iconType: "solid"
---
+
+
## Core features
- **User, Session, and AI Agent Memory**: Retains information across sessions and interactions for users and AI agents, ensuring continuity and context.
diff --git a/docs/features/advanced-retrieval.mdx b/docs/features/advanced-retrieval.mdx
index 2d6efc7b..9a9f295b 100644
--- a/docs/features/advanced-retrieval.mdx
+++ b/docs/features/advanced-retrieval.mdx
@@ -4,6 +4,8 @@ icon: "magnifying-glass"
iconType: "solid"
---
+
+
Mem0's **Advanced Retrieval** feature delivers superior search results by leveraging state-of-the-art search algorithms. Beyond the default search functionality, Mem0 offers the following advanced retrieval modes:
1. **Keyword Search**
diff --git a/docs/features/async-client.mdx b/docs/features/async-client.mdx
index 1ee048fa..0082edbd 100644
--- a/docs/features/async-client.mdx
+++ b/docs/features/async-client.mdx
@@ -5,6 +5,7 @@ icon: "bolt"
iconType: "solid"
---
+
The `AsyncMemoryClient` is an asynchronous client for interacting with the Mem0 API. It provides similar functionality to the synchronous `MemoryClient` but allows for non-blocking operations, which can be beneficial in applications that require high concurrency.
## Initialization
diff --git a/docs/features/contextual-add.mdx b/docs/features/contextual-add.mdx
index 846e2232..3dc35dc5 100644
--- a/docs/features/contextual-add.mdx
+++ b/docs/features/contextual-add.mdx
@@ -4,6 +4,8 @@ icon: "square-plus"
iconType: "solid"
---
+
+
Mem0 now supports an contextual add version (v2). To use it, set `version="v2"` during the add call. The default version is v1, which is deprecated now. We recommend migrating to `v2` for new applications.
## Key Differences Between v1 and v2
diff --git a/docs/features/custom-categories.mdx b/docs/features/custom-categories.mdx
index faaca07b..4679c133 100644
--- a/docs/features/custom-categories.mdx
+++ b/docs/features/custom-categories.mdx
@@ -5,6 +5,8 @@ icon: "tags"
iconType: "solid"
---
+
+
## How to set custom categories?
You can now create custom categories tailored to your specific needs, instead of using the default categories such as travel, sports, music, and more (see [default categories](#default-categories) below). **When custom categories are provided, they will override the default categories.**
diff --git a/docs/features/custom-fact-extraction-prompt.mdx b/docs/features/custom-fact-extraction-prompt.mdx
index 1bb47139..fa9a6684 100644
--- a/docs/features/custom-fact-extraction-prompt.mdx
+++ b/docs/features/custom-fact-extraction-prompt.mdx
@@ -5,6 +5,8 @@ icon: "pencil"
iconType: "solid"
---
+
+
## Introduction to Custom Fact Extraction Prompt
Custom fact extraction prompt allow you to tailor the behavior of your Mem0 instance to specific use cases or domains.
diff --git a/docs/features/custom-instructions.mdx b/docs/features/custom-instructions.mdx
index f9195fcc..ab8ec555 100644
--- a/docs/features/custom-instructions.mdx
+++ b/docs/features/custom-instructions.mdx
@@ -5,6 +5,8 @@ icon: "pencil"
iconType: "solid"
---
+
+
## Introduction to Custom Instructions
Custom instructions allow you to define specific guidelines for your project. This feature helps ensure consistency and provides clear direction for handling project-specific requirements.
diff --git a/docs/features/custom-update-memory-prompt.mdx b/docs/features/custom-update-memory-prompt.mdx
index 3ab99555..0c17ddc5 100644
--- a/docs/features/custom-update-memory-prompt.mdx
+++ b/docs/features/custom-update-memory-prompt.mdx
@@ -3,9 +3,12 @@ title: Custom Update Memory Prompt
icon: "pencil"
iconType: "solid"
---
+
+
+
Update memory prompt is a prompt used to determine the action to be performed on the memory.
By customizing this prompt, you can control how the memory is updated.
-
+
## Introduction
Mem0 memory system compares the newly retrieved facts with the existing memory and determines the action to be performed on the memory.
diff --git a/docs/features/direct-import.mdx b/docs/features/direct-import.mdx
index a1619ece..3946ea7a 100644
--- a/docs/features/direct-import.mdx
+++ b/docs/features/direct-import.mdx
@@ -5,6 +5,8 @@ icon: "arrow-right"
iconType: "solid"
---
+
+
## How to use Direct Import?
The Direct Import feature allows users to skip the memory deduction phase and directly input pre-defined memories into the system for storage and retrieval.
To enable this feature, you need to set the `infer` parameter to `False` in the `add` method.
diff --git a/docs/features/expiration-date.mdx b/docs/features/expiration-date.mdx
index 64b75734..cd39e505 100644
--- a/docs/features/expiration-date.mdx
+++ b/docs/features/expiration-date.mdx
@@ -5,6 +5,8 @@ icon: "clock"
iconType: "solid"
---
+
+
## Benefits of Memory Expiration
Setting expiration dates for memories offers several advantages:
diff --git a/docs/features/feedback-mechanism.mdx b/docs/features/feedback-mechanism.mdx
index 1318b65c..55a3ae9c 100644
--- a/docs/features/feedback-mechanism.mdx
+++ b/docs/features/feedback-mechanism.mdx
@@ -4,6 +4,8 @@ icon: "thumbs-up"
iconType: "solid"
---
+
+
Mem0's **Feedback Mechanism** allows you to provide feedback on the memories generated by your application. This feedback is used to improve the accuracy of the memories and the search results.
## How it works
diff --git a/docs/features/graph-memory.mdx b/docs/features/graph-memory.mdx
index e4f37c3c..a8c057bc 100644
--- a/docs/features/graph-memory.mdx
+++ b/docs/features/graph-memory.mdx
@@ -5,6 +5,8 @@ iconType: "solid"
description: "Enable graph-based memory retrieval for more contextually relevant results"
---
+
+
## Overview
Graph Memory enhances memory pipeline by creating relationships between entities in your data. It builds a network of interconnected information for more contextually relevant search results.
diff --git a/docs/features/memory-export.mdx b/docs/features/memory-export.mdx
index e30b963e..e4d9a27c 100644
--- a/docs/features/memory-export.mdx
+++ b/docs/features/memory-export.mdx
@@ -5,6 +5,8 @@ icon: "file-export"
iconType: "solid"
---
+
+
## Overview
The Memory Export feature allows you to create structured exports of memories using customizable Pydantic schemas. This process enables you to transform your stored memories into specific data formats that match your needs. You can apply various filters to narrow down which memories to export and define exactly how the data should be structured.
diff --git a/docs/features/multimodal-support.mdx b/docs/features/multimodal-support.mdx
index 6754b357..6f48f15a 100644
--- a/docs/features/multimodal-support.mdx
+++ b/docs/features/multimodal-support.mdx
@@ -5,6 +5,8 @@ icon: "image"
iconType: "solid"
---
+
+
Mem0 extends its capabilities beyond text by supporting multimodal data, including images and documents. With this feature, users can seamlessly integrate visual and document content into their interactions—allowing Mem0 to extract relevant information from various media types and enrich the memory system.
## How It Works
diff --git a/docs/features/openai_compatibility.mdx b/docs/features/openai_compatibility.mdx
index 6cd52ec3..7afaa27f 100644
--- a/docs/features/openai_compatibility.mdx
+++ b/docs/features/openai_compatibility.mdx
@@ -4,6 +4,8 @@ icon: "code"
iconType: "solid"
---
+
+
Mem0 can be easily integrated into chat applications to enhance conversational agents with structured memory. Mem0's APIs are designed to be compatible with OpenAI's, with the goal of making it easy to leverage Mem0 in applications you may have already built.
If you have a `Mem0 API key`, you can use it to initialize the client. Alternatively, you can initialize Mem0 without an API key if you're using it locally.
diff --git a/docs/features/platform-overview.mdx b/docs/features/platform-overview.mdx
index 8400adbd..9e28b313 100644
--- a/docs/features/platform-overview.mdx
+++ b/docs/features/platform-overview.mdx
@@ -4,6 +4,8 @@ icon: "info"
iconType: "solid"
---
+
+
Learn about the key features and capabilities that make Mem0 a powerful platform for memory management and retrieval.
## Core Features
diff --git a/docs/features/selective-memory.mdx b/docs/features/selective-memory.mdx
index 6b731e6d..bdb65b64 100644
--- a/docs/features/selective-memory.mdx
+++ b/docs/features/selective-memory.mdx
@@ -5,6 +5,8 @@ icon: "filter"
iconType: "solid"
---
+
+
## Benefits of Memory Customization
Memory customization offers several key benefits:
diff --git a/docs/features/timestamp.mdx b/docs/features/timestamp.mdx
index 8dde36f3..cd95256b 100644
--- a/docs/features/timestamp.mdx
+++ b/docs/features/timestamp.mdx
@@ -5,6 +5,8 @@ icon: "clock"
iconType: "solid"
---
+
+
## Overview
The Memory Timestamps feature allows you to specify when a memory was created, regardless of when it's actually added to the system. This powerful capability enables you to:
diff --git a/docs/features/webhooks.mdx b/docs/features/webhooks.mdx
index 8994d490..6988e643 100644
--- a/docs/features/webhooks.mdx
+++ b/docs/features/webhooks.mdx
@@ -5,6 +5,8 @@ icon: "webhook"
iconType: "solid"
---
+
+
## Overview
Webhooks enable real-time notifications for memory events in your Mem0 project. Webhooks are configured at the project level, meaning each webhook is tied to a specific project and receives events solely from that project. You can configure webhooks to send HTTP POST requests to your specified URLs whenever memories are created, updated, or deleted.
diff --git a/docs/integrations.mdx b/docs/integrations.mdx
index 8893a748..02206ac9 100644
--- a/docs/integrations.mdx
+++ b/docs/integrations.mdx
@@ -3,6 +3,8 @@ title: Overview
description: How to integrate Mem0 into other frameworks
---
+
+
Mem0 seamlessly integrates with popular AI frameworks and tools to enhance your LLM-based applications with persistent memory capabilities. By integrating Mem0, your applications benefit from:
- Enhanced context management across multiple frameworks
diff --git a/docs/integrations/agno.mdx b/docs/integrations/agno.mdx
index 805dd081..ddc992f9 100644
--- a/docs/integrations/agno.mdx
+++ b/docs/integrations/agno.mdx
@@ -1,6 +1,7 @@
---
title: Agno
---
+
Integrate [**Mem0**](https://github.com/mem0ai/mem0) with [Agno](https://github.com/agno-agi/agno), a Python framework for building autonomous agents. This integration enables Agno agents to access persistent memory across conversations, enhancing context retention and personalization.
diff --git a/docs/integrations/autogen.mdx b/docs/integrations/autogen.mdx
index 53178205..da85955b 100644
--- a/docs/integrations/autogen.mdx
+++ b/docs/integrations/autogen.mdx
@@ -1,5 +1,7 @@
Build conversational AI agents with memory capabilities. This integration combines AutoGen for creating AI agents with Mem0 for memory management, enabling context-aware and personalized interactions.
+
+
## Overview
In this guide, we'll explore an example of creating a conversational AI system with memory:
diff --git a/docs/integrations/crewai.mdx b/docs/integrations/crewai.mdx
index e2b90444..e3a6310a 100644
--- a/docs/integrations/crewai.mdx
+++ b/docs/integrations/crewai.mdx
@@ -2,6 +2,8 @@
title: CrewAI
---
+
+
Build an AI system that combines CrewAI's agent-based architecture with Mem0's memory capabilities. This integration enables persistent memory across agent interactions and personalized task execution based on user history.
## Overview
diff --git a/docs/integrations/dify.mdx b/docs/integrations/dify.mdx
index e08b367b..734113bb 100644
--- a/docs/integrations/dify.mdx
+++ b/docs/integrations/dify.mdx
@@ -2,6 +2,8 @@
title: Dify
---
+
+
# Integrating Mem0 with Dify AI
Mem0 brings a robust memory layer to Dify AI, empowering your AI agents with persistent conversation storage and retrieval capabilities. With Mem0, your Dify applications gain the ability to recall past interactions and maintain context, ensuring more natural and insightful conversations.
diff --git a/docs/integrations/elevenlabs.mdx b/docs/integrations/elevenlabs.mdx
index 6d38c734..a265d681 100644
--- a/docs/integrations/elevenlabs.mdx
+++ b/docs/integrations/elevenlabs.mdx
@@ -2,6 +2,8 @@
title: ElevenLabs
---
+
+
Create voice-based conversational AI agents with memory capabilities by integrating ElevenLabs and Mem0. This integration enables persistent, context-aware voice interactions that remember past conversations.
## Overview
diff --git a/docs/integrations/flowise.mdx b/docs/integrations/flowise.mdx
index 9f1d747d..7e0d1906 100644
--- a/docs/integrations/flowise.mdx
+++ b/docs/integrations/flowise.mdx
@@ -2,6 +2,8 @@
title: Flowise
---
+
+
The [**Mem0 Memory**](https://github.com/mem0ai/mem0) integration with [Flowise](https://github.com/FlowiseAI/Flowise) enables persistent memory capabilities for your AI chatflows. [Flowise](https://flowiseai.com/) is an open-source low-code tool for developers to build customized LLM orchestration flows & AI agents using a drag & drop interface.
## Overview
diff --git a/docs/integrations/keywords.mdx b/docs/integrations/keywords.mdx
index 6c812d6f..e9518383 100644
--- a/docs/integrations/keywords.mdx
+++ b/docs/integrations/keywords.mdx
@@ -2,6 +2,8 @@
title: Keywords AI
---
+
+
Build AI applications with persistent memory and comprehensive LLM observability by integrating Mem0 with Keywords AI.
## Overview
diff --git a/docs/integrations/langchain-tools.mdx b/docs/integrations/langchain-tools.mdx
index 62b3b0d7..f0b72444 100644
--- a/docs/integrations/langchain-tools.mdx
+++ b/docs/integrations/langchain-tools.mdx
@@ -3,6 +3,8 @@ title: Langchain Tools
description: 'Integrate Mem0 with LangChain tools to enable AI agents to store, search, and manage memories through structured interfaces'
---
+
+
## Overview
Mem0 provides a suite of tools for storing, searching, and retrieving memories, enabling agents to maintain context and learn from past interactions. The tools are built as Langchain tools, making them easily integrable with any AI agent implementation.
diff --git a/docs/integrations/langchain.mdx b/docs/integrations/langchain.mdx
index 1d664c1f..928ee06f 100644
--- a/docs/integrations/langchain.mdx
+++ b/docs/integrations/langchain.mdx
@@ -2,6 +2,8 @@
title: Langchain
---
+
+
Build a personalized Travel Agent AI using LangChain for conversation flow and Mem0 for memory retention. This integration enables context-aware and efficient travel planning experiences.
## Overview
diff --git a/docs/integrations/langgraph.mdx b/docs/integrations/langgraph.mdx
index cb0a2f58..158e98d5 100644
--- a/docs/integrations/langgraph.mdx
+++ b/docs/integrations/langgraph.mdx
@@ -2,6 +2,8 @@
title: LangGraph
---
+
+
Build a personalized Customer Support AI Agent using LangGraph for conversation flow and Mem0 for memory retention. This integration enables context-aware and efficient support experiences.
## Overview
diff --git a/docs/integrations/livekit.mdx b/docs/integrations/livekit.mdx
index 66bd3126..fd2d7b4c 100644
--- a/docs/integrations/livekit.mdx
+++ b/docs/integrations/livekit.mdx
@@ -2,6 +2,8 @@
title: Livekit
---
+
+
This guide demonstrates how to create a memory-enabled voice assistant using LiveKit, Deepgram, OpenAI, and Mem0, focusing on creating an intelligent, context-aware travel planning agent.
## Prerequisites
diff --git a/docs/integrations/llama-index.mdx b/docs/integrations/llama-index.mdx
index ac0551f0..1e3aabdf 100644
--- a/docs/integrations/llama-index.mdx
+++ b/docs/integrations/llama-index.mdx
@@ -2,6 +2,8 @@
title: LlamaIndex
---
+
+
LlamaIndex supports Mem0 as a [memory store](https://llamahub.ai/l/memory/llama-index-memory-mem0). In this guide, we'll show you how to use it.
diff --git a/docs/integrations/mcp-server.mdx b/docs/integrations/mcp-server.mdx
index d4bfbef7..2c5b746a 100644
--- a/docs/integrations/mcp-server.mdx
+++ b/docs/integrations/mcp-server.mdx
@@ -2,6 +2,8 @@
title: MCP Server
---
+
+
## Integrating mem0 as an MCP Server in Cursor
[mem0](https://github.com/mem0ai/mem0-mcp) is a powerful tool designed to enhance AI-driven workflows, particularly in code generation and contextual memory. In this guide, we'll walk through integrating mem0 as an **MCP (Model Context Protocol) server** within [Cursor](https://cursor.sh/), an AI-powered coding editor.
diff --git a/docs/integrations/multion.mdx b/docs/integrations/multion.mdx
index 157116e9..df646d3f 100644
--- a/docs/integrations/multion.mdx
+++ b/docs/integrations/multion.mdx
@@ -2,6 +2,8 @@
title: MultiOn
---
+
+
Build a personal browser agent that remembers user preferences and automates web tasks. It integrates Mem0 for memory management with MultiOn for executing browser actions, enabling personalized and efficient web interactions.
## Overview
diff --git a/docs/integrations/pipecat.mdx b/docs/integrations/pipecat.mdx
index 231451b0..964181be 100644
--- a/docs/integrations/pipecat.mdx
+++ b/docs/integrations/pipecat.mdx
@@ -3,6 +3,8 @@ title: 'Pipecat'
description: 'Integrate Mem0 with Pipecat for conversational memory in AI agents'
---
+
+
# Pipecat Integration
Mem0 seamlessly integrates with [Pipecat](https://pipecat.ai), providing long-term memory capabilities for conversational AI agents. This integration allows your Pipecat-powered applications to remember past conversations and provide personalized responses based on user history.
diff --git a/docs/integrations/vercel-ai-sdk.mdx b/docs/integrations/vercel-ai-sdk.mdx
index 47e8cef4..11383dcb 100644
--- a/docs/integrations/vercel-ai-sdk.mdx
+++ b/docs/integrations/vercel-ai-sdk.mdx
@@ -2,6 +2,8 @@
title: Vercel AI SDK
---
+
+
The [**Mem0 AI SDK Provider**](https://www.npmjs.com/package/@mem0/vercel-ai-provider) is a library developed by **Mem0** to integrate with the Vercel AI SDK. This library brings enhanced AI interaction capabilities to your applications by introducing persistent memory functionality.
diff --git a/docs/open-source/graph_memory/features.mdx b/docs/open-source/graph_memory/features.mdx
index 60943c09..d96a055e 100644
--- a/docs/open-source/graph_memory/features.mdx
+++ b/docs/open-source/graph_memory/features.mdx
@@ -5,6 +5,8 @@ icon: "list-check"
iconType: "solid"
---
+
+
Graph Memory is a powerful feature that allows users to create and utilize complex relationships between pieces of information.
## Graph Memory supports the following features:
diff --git a/docs/open-source/graph_memory/overview.mdx b/docs/open-source/graph_memory/overview.mdx
index d59f1b36..2ff372d7 100644
--- a/docs/open-source/graph_memory/overview.mdx
+++ b/docs/open-source/graph_memory/overview.mdx
@@ -5,6 +5,8 @@ icon: "database"
iconType: "solid"
---
+
+
Mem0 now supports **Graph Memory**.
With Graph Memory, users can now create and utilize complex relationships between pieces of information, allowing for more nuanced and context-aware responses.
This integration enables users to leverage the strengths of both vector-based and graph-based approaches, resulting in more accurate and comprehensive information retrieval and generation.
diff --git a/docs/open-source/multimodal-support.mdx b/docs/open-source/multimodal-support.mdx
index fcf91049..390fd79b 100644
--- a/docs/open-source/multimodal-support.mdx
+++ b/docs/open-source/multimodal-support.mdx
@@ -4,6 +4,8 @@ icon: "image"
iconType: "solid"
---
+
+
Mem0 extends its capabilities beyond text by supporting multimodal data, including images. Users can seamlessly integrate images into their interactions, allowing Mem0 to extract pertinent information from visual content and enrich the memory system.
## How It Works
diff --git a/docs/open-source/node-quickstart.mdx b/docs/open-source/node-quickstart.mdx
index 39f7cbf7..ee9f3791 100644
--- a/docs/open-source/node-quickstart.mdx
+++ b/docs/open-source/node-quickstart.mdx
@@ -5,6 +5,8 @@ icon: "node"
iconType: "solid"
---
+
+
> Welcome to the Mem0 quickstart guide. This guide will help you get up and running with Mem0 in no time.
## Installation
diff --git a/docs/open-source/python-quickstart.mdx b/docs/open-source/python-quickstart.mdx
index e0a90642..c212358f 100644
--- a/docs/open-source/python-quickstart.mdx
+++ b/docs/open-source/python-quickstart.mdx
@@ -5,6 +5,8 @@ icon: "python"
iconType: "solid"
---
+
+
> Welcome to the Mem0 quickstart guide. This guide will help you get up and running with Mem0 in no time.
## Installation
diff --git a/docs/open-source/quickstart.mdx b/docs/open-source/quickstart.mdx
index 3fc5c8f5..a9b8512c 100644
--- a/docs/open-source/quickstart.mdx
+++ b/docs/open-source/quickstart.mdx
@@ -4,6 +4,8 @@ icon: "info"
iconType: "solid"
---
+
+
Welcome to Mem0 Open Source - a powerful, self-hosted memory management solution for AI agents and assistants. With Mem0 OSS, you get full control over your infrastructure while maintaining complete customization flexibility.
We offer two SDKs for Python and Node.js.
diff --git a/docs/overview.mdx b/docs/overview.mdx
index 400dd689..76221fb4 100644
--- a/docs/overview.mdx
+++ b/docs/overview.mdx
@@ -4,6 +4,8 @@ icon: "info"
iconType: "solid"
---
+
+
🎉 We now support [Grok 3](components/llms/models/xAI)! Enhance your AI assistants with the latest and most capable language model from xAI.
diff --git a/docs/platform/overview.mdx b/docs/platform/overview.mdx
index 51d9c1c4..997fa15e 100644
--- a/docs/platform/overview.mdx
+++ b/docs/platform/overview.mdx
@@ -5,6 +5,8 @@ icon: "eye"
iconType: "solid"
---
+
+
## Welcome to Mem0 Platform
The Mem0 Platform is a managed service and the easiest way to add our powerful memory layer to your applications.
diff --git a/docs/platform/quickstart.mdx b/docs/platform/quickstart.mdx
index eaaeccb9..7a2326f8 100644
--- a/docs/platform/quickstart.mdx
+++ b/docs/platform/quickstart.mdx
@@ -5,6 +5,8 @@ icon: "book"
iconType: "solid"
---
+
+
🎉 Looking for TypeScript support? Mem0 has you covered! Check out an example [here](/platform/quickstart/#4-11-working-with-mem0-in-typescript).
diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx
index 5709c99e..a263be1b 100644
--- a/docs/quickstart.mdx
+++ b/docs/quickstart.mdx
@@ -3,6 +3,10 @@ title: Quickstart
icon: "bolt"
iconType: "solid"
---
+
+
+
+
Mem0 offers two powerful ways to leverage our technology: [our managed platform](#mem0-platform-managed-solution) and [our open source solution](#mem0-open-source).
Check out our [Playground](https://mem0.dev/pd-pg) to see Mem0 in action.
diff --git a/evaluation/Makefile b/evaluation/Makefile
new file mode 100644
index 00000000..7f0072e6
--- /dev/null
+++ b/evaluation/Makefile
@@ -0,0 +1,31 @@
+
+# Run the experiments
+run-mem0-add:
+ python run_experiments.py --technique_type mem0 --method add
+
+run-mem0-search:
+ python run_experiments.py --technique_type mem0 --method search --output_folder results/ --top_k 30
+
+run-mem0-plus-add:
+ python run_experiments.py --technique_type mem0 --method add --is_graph
+
+run-mem0-plus-search:
+ python run_experiments.py --technique_type mem0 --method search --is_graph --output_folder results/ --top_k 30
+
+run-rag:
+ python run_experiments.py --technique_type rag --chunk_size 500 --num_chunks 1 --output_folder results/
+
+run-full-context:
+ python run_experiments.py --technique_type rag --chunk_size -1 --num_chunks 1 --output_folder results/
+
+run-langmem:
+ python run_experiments.py --technique_type langmem --output_folder results/
+
+run-zep-add:
+ python run_experiments.py --technique_type zep --method add --output_folder results/
+
+run-zep-search:
+ python run_experiments.py --technique_type zep --method search --output_folder results/
+
+run-openai:
+ python run_experiments.py --technique_type openai --output_folder results/
diff --git a/evaluation/README.md b/evaluation/README.md
index e69de29b..39aba110 100644
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -0,0 +1,192 @@
+# Mem0: Building Production‑Ready AI Agents with Scalable Long‑Term Memory
+
+[](https://arxiv.org/abs/XXXX.XXXXX)
+[](https://mem0.ai/research)
+
+This repository contains the code and dataset for our paper: **Mem0: Building Production‑Ready AI Agents with Scalable Long‑Term Memory**.
+
+## 📋 Overview
+
+This project evaluates Mem0 and compares it with different memory and retrieval techniques for AI systems:
+
+1. **Established LOCOMO Benchmarks**: We evaluate against five established approaches from the literature: LoCoMo, ReadAgent, MemoryBank, MemGPT, and A-Mem.
+2. **Open-Source Memory Solutions**: We test promising open-source memory architectures including LangMem, which provides flexible memory management capabilities.
+3. **RAG Systems**: We implement Retrieval-Augmented Generation with various configurations, testing different chunk sizes and retrieval counts to optimize performance.
+4. **Full-Context Processing**: We examine the effectiveness of passing the entire conversation history within the context window of the LLM as a baseline approach.
+5. **Proprietary Memory Systems**: We evaluate OpenAI's built-in memory feature available in their ChatGPT interface to compare against commercial solutions.
+6. **Third-Party Memory Providers**: We incorporate Zep, a specialized memory management platform designed for AI agents, to assess the performance of dedicated memory infrastructure.
+
+We test these techniques on the LOCOMO dataset, which contains conversational data with various question types to evaluate memory recall and understanding.
+
+## 🔍 Dataset
+
+The dataset is located in the `dataset/` directory:
+- `locomo10.json`: Original dataset
+- `locomo10_rag.json`: Dataset formatted for RAG experiments
+
+## 📁 Project Structure
+
+```
+.
+├── src/ # Source code for different memory techniques
+│ ├── mem0/ # Implementation of the Mem0 technique
+│ ├── openai/ # Implementation of the OpenAI memory
+│ ├── zep/ # Implementation of the Zep memory
+│ ├── rag.py # Implementation of the RAG technique
+│ └── langmem.py # Implementation of the Language-based memory
+├── metrics/ # Code for evaluation metrics
+├── results/ # Results of experiments
+├── dataset/ # Dataset files
+├── evals.py # Evaluation script
+├── run_experiments.py # Script to run experiments
+├── generate_scores.py # Script to generate scores from results
+└── prompts.py # Prompts used for the models
+```
+
+## 🚀 Getting Started
+
+### Prerequisites
+
+Create a `.env` file with your API keys and configurations. The following keys are required:
+
+```
+# OpenAI API key for GPT models and embeddings
+OPENAI_API_KEY="your-openai-api-key"
+
+# Mem0 API keys (for Mem0 and Mem0+ techniques)
+MEM0_API_KEY="your-mem0-api-key"
+MEM0_PROJECT_ID="your-mem0-project-id"
+MEM0_ORGANIZATION_ID="your-mem0-organization-id"
+
+# Model configuration
+MODEL="gpt-4o-mini" # or your preferred model
+EMBEDDING_MODEL="text-embedding-3-small" # or your preferred embedding model
+ZEP_API_KEY="api-key-from-zep"
+```
+
+### Running Experiments
+
+You can run experiments using the provided Makefile commands:
+
+#### Memory Techniques
+
+```bash
+# Run Mem0 experiments
+make run-mem0-add # Add memories using Mem0
+make run-mem0-search # Search memories using Mem0
+
+# Run Mem0+ experiments (with graph-based search)
+make run-mem0-plus-add # Add memories using Mem0+
+make run-mem0-plus-search # Search memories using Mem0+
+
+# Run RAG experiments
+make run-rag # Run RAG with chunk size 500
+make run-full-context # Run RAG with full context
+
+# Run LangMem experiments
+make run-langmem # Run LangMem
+
+# Run Zep experiments
+make run-zep-add # Add memories using Zep
+make run-zep-search # Search memories using Zep
+
+# Run OpenAI experiments
+make run-openai # Run OpenAI experiments
+```
+
+Alternatively, you can run experiments directly with custom parameters:
+
+```bash
+python run_experiments.py --technique_type [mem0|rag|langmem] [additional parameters]
+```
+
+#### Command-line Parameters:
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `--technique_type` | Memory technique to use (mem0, rag, langmem) | mem0 |
+| `--method` | Method to use (add, search) | add |
+| `--chunk_size` | Chunk size for processing | 1000 |
+| `--top_k` | Number of top memories to retrieve | 30 |
+| `--filter_memories` | Whether to filter memories | False |
+| `--is_graph` | Whether to use graph-based search | False |
+| `--num_chunks` | Number of chunks to process for RAG | 1 |
+
+### 📊 Evaluation
+
+To evaluate results, run:
+
+```bash
+python evals.py --input_file [path_to_results] --output_file [output_path]
+```
+
+This script:
+1. Processes each question-answer pair
+2. Calculates BLEU and F1 scores automatically
+3. Uses an LLM judge to evaluate answer correctness
+4. Saves the combined results to the output file
+
+### 📈 Generating Scores
+
+Generate final scores with:
+
+```bash
+python generate_scores.py
+```
+
+This script:
+1. Loads the evaluation metrics data
+2. Calculates mean scores for each category (BLEU, F1, LLM)
+3. Reports the number of questions per category
+4. Calculates overall mean scores across all categories
+
+Example output:
+```
+Mean Scores Per Category:
+ bleu_score f1_score llm_score count
+category
+1 0.xxxx 0.xxxx 0.xxxx xx
+2 0.xxxx 0.xxxx 0.xxxx xx
+3 0.xxxx 0.xxxx 0.xxxx xx
+
+Overall Mean Scores:
+bleu_score 0.xxxx
+f1_score 0.xxxx
+llm_score 0.xxxx
+```
+
+## 📏 Evaluation Metrics
+
+We use several metrics to evaluate the performance of different memory techniques:
+
+1. **BLEU Score**: Measures the similarity between the model's response and the ground truth
+2. **F1 Score**: Measures the harmonic mean of precision and recall
+3. **LLM Score**: A binary score (0 or 1) determined by an LLM judge evaluating the correctness of responses
+4. **Token Consumption**: Number of tokens required to generate final answer.
+5. **Latency**: Time required during search and to generate response.
+
+## 📚 Citation
+
+If you use this code or dataset in your research, please cite our paper:
+
+```bibtex
+@article{mem0,
+ title={Mem0: Building Production‑Ready AI Agents with Scalable Long‑Term Memory},
+ author={---},
+ journal={arXiv preprint},
+ year={2025}
+}
+```
+
+## 📄 License
+
+[MIT License](LICENSE)
+
+## 👥 Contributors
+
+- [Prateek Chhikara](https://github.com/prateekchhikara)
+- [Dev Khant](https://github.com/Dev-Khant)
+- [Saket Aryan](https://github.com/whysosaket)
+- [Taranjeet Singh](https://github.com/taranjeet)
+- [Deshraj Yadav](https://github.com/deshraj)
+
diff --git a/evaluation/evals.py b/evaluation/evals.py
new file mode 100644
index 00000000..c13d4c29
--- /dev/null
+++ b/evaluation/evals.py
@@ -0,0 +1,81 @@
+import json
+import argparse
+from metrics.utils import calculate_metrics, calculate_bleu_scores
+from metrics.llm_judge import evaluate_llm_judge
+from collections import defaultdict
+from tqdm import tqdm
+import concurrent.futures
+import threading
+
+
+def process_item(item_data):
+ k, v = item_data
+ local_results = defaultdict(list)
+
+ for item in v:
+ gt_answer = str(item['answer'])
+ pred_answer = str(item['response'])
+ category = str(item['category'])
+ question = str(item['question'])
+
+ # Skip category 5
+ if category == '5':
+ continue
+
+ metrics = calculate_metrics(pred_answer, gt_answer)
+ bleu_scores = calculate_bleu_scores(pred_answer, gt_answer)
+ llm_score = evaluate_llm_judge(question, gt_answer, pred_answer)
+
+ local_results[k].append({
+ "question": question,
+ "answer": gt_answer,
+ "response": pred_answer,
+ "category": category,
+ "bleu_score": bleu_scores["bleu1"],
+ "f1_score": metrics["f1"],
+ "llm_score": llm_score
+ })
+
+ return local_results
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Evaluate RAG results')
+ parser.add_argument('--input_file', type=str,
+ default="results/rag_results_500_k1.json",
+ help='Path to the input dataset file')
+ parser.add_argument('--output_file', type=str,
+ default="evaluation_metrics.json",
+ help='Path to save the evaluation results')
+ parser.add_argument('--max_workers', type=int, default=10,
+ help='Maximum number of worker threads')
+
+ args = parser.parse_args()
+
+ with open(args.input_file, 'r') as f:
+ data = json.load(f)
+
+ results = defaultdict(list)
+ results_lock = threading.Lock()
+
+ # Use ThreadPoolExecutor with specified workers
+ with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
+ futures = [executor.submit(process_item, item_data)
+ for item_data in data.items()]
+
+ for future in tqdm(concurrent.futures.as_completed(futures),
+ total=len(futures)):
+ local_results = future.result()
+ with results_lock:
+ for k, items in local_results.items():
+ results[k].extend(items)
+
+ # Save results to JSON file
+ with open(args.output_file, 'w') as f:
+ json.dump(results, f, indent=4)
+
+ print(f"Results saved to {args.output_file}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/evaluation/generate_scores.py b/evaluation/generate_scores.py
new file mode 100644
index 00000000..7fab6d08
--- /dev/null
+++ b/evaluation/generate_scores.py
@@ -0,0 +1,41 @@
+import pandas as pd
+import json
+
+# Load the evaluation metrics data
+with open('evaluation_metrics.json', 'r') as f:
+ data = json.load(f)
+
+# Flatten the data into a list of question items
+all_items = []
+for key in data:
+ all_items.extend(data[key])
+
+# Convert to DataFrame
+df = pd.DataFrame(all_items)
+
+# Convert category to numeric type
+df['category'] = pd.to_numeric(df['category'])
+
+# Calculate mean scores by category
+result = df.groupby('category').agg({
+ 'bleu_score': 'mean',
+ 'f1_score': 'mean',
+ 'llm_score': 'mean'
+}).round(4)
+
+# Add count of questions per category
+result['count'] = df.groupby('category').size()
+
+# Print the results
+print("Mean Scores Per Category:")
+print(result)
+
+# Calculate overall means
+overall_means = df.agg({
+ 'bleu_score': 'mean',
+ 'f1_score': 'mean',
+ 'llm_score': 'mean'
+}).round(4)
+
+print("\nOverall Mean Scores:")
+print(overall_means)
\ No newline at end of file
diff --git a/evaluation/metrics/llm_judge.py b/evaluation/metrics/llm_judge.py
new file mode 100644
index 00000000..d148da7b
--- /dev/null
+++ b/evaluation/metrics/llm_judge.py
@@ -0,0 +1,127 @@
+from openai import OpenAI
+import json
+from collections import defaultdict
+import numpy as np
+import argparse
+
+client = OpenAI()
+
+ACCURACY_PROMPT = """
+Your task is to label an answer to a question as ’CORRECT’ or ’WRONG’. You will be given the following data:
+ (1) a question (posed by one user to another user),
+ (2) a ’gold’ (ground truth) answer,
+ (3) a generated answer
+which you will score as CORRECT/WRONG.
+
+The point of the question is to ask about something one user should know about the other user based on their prior conversations.
+The gold answer will usually be a concise and short answer that includes the referenced topic, for example:
+Question: Do you remember what I got the last time I went to Hawaii?
+Gold answer: A shell necklace
+The generated answer might be much longer, but you should be generous with your grading - as long as it touches on the same topic as the gold answer, it should be counted as CORRECT.
+
+For time related questions, the gold answer will be a specific date, month, year, etc. The generated answer might be much longer or use relative time references (like "last Tuesday" or "next month"), but you should be generous with your grading - as long as it refers to the same date or time period as the gold answer, it should be counted as CORRECT. Even if the format differs (e.g., "May 7th" vs "7 May"), consider it CORRECT if it's the same date.
+
+Now it’s time for the real question:
+Question: {question}
+Gold answer: {gold_answer}
+Generated answer: {generated_answer}
+
+First, provide a short (one sentence) explanation of your reasoning, then finish with CORRECT or WRONG.
+Do NOT include both CORRECT and WRONG in your response, or it will break the evaluation script.
+
+Just return the label CORRECT or WRONG in a json format with the key as "label".
+"""
+
+def evaluate_llm_judge(question, gold_answer, generated_answer):
+ """Evaluate the generated answer against the gold answer using an LLM judge."""
+ response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=[{
+ "role": "user",
+ "content": ACCURACY_PROMPT.format(
+ question=question,
+ gold_answer=gold_answer,
+ generated_answer=generated_answer
+ )
+ }],
+ response_format={"type": "json_object"},
+ temperature=0.0
+ )
+ label = json.loads(response.choices[0].message.content)['label']
+ return 1 if label == "CORRECT" else 0
+
+
+def main():
+ """Main function to evaluate RAG results using LLM judge."""
+ parser = argparse.ArgumentParser(
+ description='Evaluate RAG results using LLM judge'
+ )
+ parser.add_argument(
+ '--input_file',
+ type=str,
+ default="results/default_run_v4_k30_new_graph.json",
+ help='Path to the input dataset file'
+ )
+
+ args = parser.parse_args()
+
+ dataset_path = args.input_file
+ output_path = f"results/llm_judge_{dataset_path.split('/')[-1]}"
+
+ with open(dataset_path, "r") as f:
+ data = json.load(f)
+
+ LLM_JUDGE = defaultdict(list)
+ RESULTS = defaultdict(list)
+
+ index = 0
+ for k, v in data.items():
+ for x in v:
+ question = x['question']
+ gold_answer = x['answer']
+ generated_answer = x['response']
+ category = x['category']
+
+ # Skip category 5
+ if int(category) == 5:
+ continue
+
+ # Evaluate the answer
+ label = evaluate_llm_judge(question, gold_answer, generated_answer)
+ LLM_JUDGE[category].append(label)
+
+ # Store the results
+ RESULTS[index].append({
+ "question": question,
+ "gt_answer": gold_answer,
+ "response": generated_answer,
+ "category": category,
+ "llm_label": label
+ })
+
+ # Save intermediate results
+ with open(output_path, "w") as f:
+ json.dump(RESULTS, f, indent=4)
+
+ # Print current accuracy for all categories
+ print("All categories accuracy:")
+ for cat, results in LLM_JUDGE.items():
+ if results: # Only print if there are results for this category
+ print(f" Category {cat}: {np.mean(results):.4f} "
+ f"({sum(results)}/{len(results)})")
+ print("------------------------------------------")
+ index += 1
+
+ # Save final results
+ with open(output_path, "w") as f:
+ json.dump(RESULTS, f, indent=4)
+
+ # Print final summary
+ print("PATH: ", dataset_path)
+ print("------------------------------------------")
+ for k, v in LLM_JUDGE.items():
+ print(k, np.mean(v))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/evaluation/metrics/utils.py b/evaluation/metrics/utils.py
new file mode 100644
index 00000000..60fe9001
--- /dev/null
+++ b/evaluation/metrics/utils.py
@@ -0,0 +1,224 @@
+"""
+Borrowed from https://github.com/WujiangXu/AgenticMemory/blob/main/utils.py
+
+@article{xu2025mem,
+ title={A-mem: Agentic memory for llm agents},
+ author={Xu, Wujiang and Liang, Zujie and Mei, Kai and Gao, Hang and Tan, Juntao
+ and Zhang, Yongfeng},
+ journal={arXiv preprint arXiv:2502.12110},
+ year={2025}
+}
+"""
+
+import re
+import string
+import numpy as np
+from typing import List, Dict, Union
+import statistics
+from collections import defaultdict
+from rouge_score import rouge_scorer
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from bert_score import score as bert_score
+import nltk
+from nltk.translate.meteor_score import meteor_score
+from sentence_transformers import SentenceTransformer
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from openai import OpenAI
+# from load_dataset import load_locomo_dataset, QA, Turn, Session, Conversation
+from sentence_transformers.util import pytorch_cos_sim
+
+# Download required NLTK data
+try:
+ nltk.download('punkt', quiet=True)
+ nltk.download('wordnet', quiet=True)
+except Exception as e:
+ print(f"Error downloading NLTK data: {e}")
+
+# Initialize SentenceTransformer model (this will be reused)
+try:
+ sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
+except Exception as e:
+ print(f"Warning: Could not load SentenceTransformer model: {e}")
+ sentence_model = None
+
+def simple_tokenize(text):
+ """Simple tokenization function."""
+ # Convert to string if not already
+ text = str(text)
+ return text.lower().replace('.', ' ').replace(',', ' ').replace('!', ' ').replace('?', ' ').split()
+
+def calculate_rouge_scores(prediction: str, reference: str) -> Dict[str, float]:
+ """Calculate ROUGE scores for prediction against reference."""
+ scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
+ scores = scorer.score(reference, prediction)
+ return {
+ 'rouge1_f': scores['rouge1'].fmeasure,
+ 'rouge2_f': scores['rouge2'].fmeasure,
+ 'rougeL_f': scores['rougeL'].fmeasure
+ }
+
+def calculate_bleu_scores(prediction: str, reference: str) -> Dict[str, float]:
+ """Calculate BLEU scores with different n-gram settings."""
+ pred_tokens = nltk.word_tokenize(prediction.lower())
+ ref_tokens = [nltk.word_tokenize(reference.lower())]
+
+ weights_list = [(1, 0, 0, 0), (0.5, 0.5, 0, 0), (0.33, 0.33, 0.33, 0), (0.25, 0.25, 0.25, 0.25)]
+ smooth = SmoothingFunction().method1
+
+ scores = {}
+ for n, weights in enumerate(weights_list, start=1):
+ try:
+ score = sentence_bleu(ref_tokens, pred_tokens, weights=weights, smoothing_function=smooth)
+ except Exception:
+ print(f"Error calculating BLEU score: {e}")
+ score = 0.0
+ scores[f'bleu{n}'] = score
+
+ return scores
+
+def calculate_bert_scores(prediction: str, reference: str) -> Dict[str, float]:
+ """Calculate BERTScore for semantic similarity."""
+ try:
+ P, R, F1 = bert_score([prediction], [reference], lang='en', verbose=False)
+ return {
+ 'bert_precision': P.item(),
+ 'bert_recall': R.item(),
+ 'bert_f1': F1.item()
+ }
+ except Exception as e:
+ print(f"Error calculating BERTScore: {e}")
+ return {
+ 'bert_precision': 0.0,
+ 'bert_recall': 0.0,
+ 'bert_f1': 0.0
+ }
+
+def calculate_meteor_score(prediction: str, reference: str) -> float:
+ """Calculate METEOR score for the prediction."""
+ try:
+ return meteor_score([reference.split()], prediction.split())
+ except Exception as e:
+ print(f"Error calculating METEOR score: {e}")
+ return 0.0
+
+def calculate_sentence_similarity(prediction: str, reference: str) -> float:
+ """Calculate sentence embedding similarity using SentenceBERT."""
+ if sentence_model is None:
+ return 0.0
+ try:
+ # Encode sentences
+ embedding1 = sentence_model.encode([prediction], convert_to_tensor=True)
+ embedding2 = sentence_model.encode([reference], convert_to_tensor=True)
+
+ # Calculate cosine similarity
+ similarity = pytorch_cos_sim(embedding1, embedding2).item()
+ return float(similarity)
+ except Exception as e:
+ print(f"Error calculating sentence similarity: {e}")
+ return 0.0
+
+def calculate_metrics(prediction: str, reference: str) -> Dict[str, float]:
+ """Calculate comprehensive evaluation metrics for a prediction."""
+ # Handle empty or None values
+ if not prediction or not reference:
+ return {
+ "exact_match": 0,
+ "f1": 0.0,
+ "rouge1_f": 0.0,
+ "rouge2_f": 0.0,
+ "rougeL_f": 0.0,
+ "bleu1": 0.0,
+ "bleu2": 0.0,
+ "bleu3": 0.0,
+ "bleu4": 0.0,
+ "bert_f1": 0.0,
+ "meteor": 0.0,
+ "sbert_similarity": 0.0
+ }
+
+ # Convert to strings if they're not already
+ prediction = str(prediction).strip()
+ reference = str(reference).strip()
+
+ # Calculate exact match
+ exact_match = int(prediction.lower() == reference.lower())
+
+ # Calculate token-based F1 score
+ pred_tokens = set(simple_tokenize(prediction))
+ ref_tokens = set(simple_tokenize(reference))
+ common_tokens = pred_tokens & ref_tokens
+
+ if not pred_tokens or not ref_tokens:
+ f1 = 0.0
+ else:
+ precision = len(common_tokens) / len(pred_tokens)
+ recall = len(common_tokens) / len(ref_tokens)
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
+
+ # Calculate all scores
+ rouge_scores = 0 #calculate_rouge_scores(prediction, reference)
+ bleu_scores = calculate_bleu_scores(prediction, reference)
+ bert_scores = 0 # calculate_bert_scores(prediction, reference)
+ meteor = 0 # calculate_meteor_score(prediction, reference)
+ sbert_similarity = 0 # calculate_sentence_similarity(prediction, reference)
+
+ # Combine all metrics
+ metrics = {
+ "exact_match": exact_match,
+ "f1": f1,
+ # **rouge_scores,
+ **bleu_scores,
+ # **bert_scores,
+ # "meteor": meteor,
+ # "sbert_similarity": sbert_similarity
+ }
+
+ return metrics
+
+def aggregate_metrics(all_metrics: List[Dict[str, float]], all_categories: List[int]) -> Dict[str, Dict[str, Union[float, Dict[str, float]]]]:
+ """Calculate aggregate statistics for all metrics, split by category."""
+ if not all_metrics:
+ return {}
+
+ # Initialize aggregates for overall and per-category metrics
+ aggregates = defaultdict(list)
+ category_aggregates = defaultdict(lambda: defaultdict(list))
+
+ # Collect all values for each metric, both overall and per category
+ for metrics, category in zip(all_metrics, all_categories):
+ for metric_name, value in metrics.items():
+ aggregates[metric_name].append(value)
+ category_aggregates[category][metric_name].append(value)
+
+ # Calculate statistics for overall metrics
+ results = {
+ "overall": {}
+ }
+
+ for metric_name, values in aggregates.items():
+ results["overall"][metric_name] = {
+ 'mean': statistics.mean(values),
+ 'std': statistics.stdev(values) if len(values) > 1 else 0.0,
+ 'median': statistics.median(values),
+ 'min': min(values),
+ 'max': max(values),
+ 'count': len(values)
+ }
+
+ # Calculate statistics for each category
+ for category in sorted(category_aggregates.keys()):
+ results[f"category_{category}"] = {}
+ for metric_name, values in category_aggregates[category].items():
+ if values: # Only calculate if we have values for this category
+ results[f"category_{category}"][metric_name] = {
+ 'mean': statistics.mean(values),
+ 'std': statistics.stdev(values) if len(values) > 1 else 0.0,
+ 'median': statistics.median(values),
+ 'min': min(values),
+ 'max': max(values),
+ 'count': len(values)
+ }
+
+ return results
diff --git a/evaluation/prompts.py b/evaluation/prompts.py
new file mode 100644
index 00000000..e591119d
--- /dev/null
+++ b/evaluation/prompts.py
@@ -0,0 +1,147 @@
+ANSWER_PROMPT_GRAPH = """
+ You are an intelligent memory assistant tasked with retrieving accurate information from
+ conversation memories.
+
+ # CONTEXT:
+ You have access to memories from two speakers in a conversation. These memories contain
+ timestamped information that may be relevant to answering the question. You also have
+ access to knowledge graph relations for each user, showing connections between entities,
+ concepts, and events relevant to that user.
+
+ # INSTRUCTIONS:
+ 1. Carefully analyze all provided memories from both speakers
+ 2. Pay special attention to the timestamps to determine the answer
+ 3. If the question asks about a specific event or fact, look for direct evidence in the
+ memories
+ 4. If the memories contain contradictory information, prioritize the most recent memory
+ 5. If there is a question about time references (like "last year", "two months ago",
+ etc.), calculate the actual date based on the memory timestamp. For example, if a
+ memory from 4 May 2022 mentions "went to India last year," then the trip occurred
+ in 2021.
+ 6. Always convert relative time references to specific dates, months, or years. For
+ example, convert "last year" to "2022" or "two months ago" to "March 2023" based
+ on the memory timestamp. Ignore the reference while answering the question.
+ 7. Focus only on the content of the memories from both speakers. Do not confuse
+ character names mentioned in memories with the actual users who created those
+ memories.
+ 8. The answer should be less than 5-6 words.
+ 9. Use the knowledge graph relations to understand the user's knowledge network and
+ identify important relationships between entities in the user's world.
+
+ # APPROACH (Think step by step):
+ 1. First, examine all memories that contain information related to the question
+ 2. Examine the timestamps and content of these memories carefully
+ 3. Look for explicit mentions of dates, times, locations, or events that answer the
+ question
+ 4. If the answer requires calculation (e.g., converting relative time references),
+ show your work
+ 5. Analyze the knowledge graph relations to understand the user's knowledge context
+ 6. Formulate a precise, concise answer based solely on the evidence in the memories
+ 7. Double-check that your answer directly addresses the question asked
+ 8. Ensure your final answer is specific and avoids vague time references
+
+ Memories for user {{speaker_1_user_id}}:
+
+ {{speaker_1_memories}}
+
+ Relations for user {{speaker_1_user_id}}:
+
+ {{speaker_1_graph_memories}}
+
+ Memories for user {{speaker_2_user_id}}:
+
+ {{speaker_2_memories}}
+
+ Relations for user {{speaker_2_user_id}}:
+
+ {{speaker_2_graph_memories}}
+
+ Question: {{question}}
+
+ Answer:
+ """
+
+
+ANSWER_PROMPT = """
+ You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.
+
+ # CONTEXT:
+ You have access to memories from two speakers in a conversation. These memories contain
+ timestamped information that may be relevant to answering the question.
+
+ # INSTRUCTIONS:
+ 1. Carefully analyze all provided memories from both speakers
+ 2. Pay special attention to the timestamps to determine the answer
+ 3. If the question asks about a specific event or fact, look for direct evidence in the memories
+ 4. If the memories contain contradictory information, prioritize the most recent memory
+ 5. If there is a question about time references (like "last year", "two months ago", etc.),
+ calculate the actual date based on the memory timestamp. For example, if a memory from
+ 4 May 2022 mentions "went to India last year," then the trip occurred in 2021.
+ 6. Always convert relative time references to specific dates, months, or years. For example,
+ convert "last year" to "2022" or "two months ago" to "March 2023" based on the memory
+ timestamp. Ignore the reference while answering the question.
+ 7. Focus only on the content of the memories from both speakers. Do not confuse character
+ names mentioned in memories with the actual users who created those memories.
+ 8. The answer should be less than 5-6 words.
+
+ # APPROACH (Think step by step):
+ 1. First, examine all memories that contain information related to the question
+ 2. Examine the timestamps and content of these memories carefully
+ 3. Look for explicit mentions of dates, times, locations, or events that answer the question
+ 4. If the answer requires calculation (e.g., converting relative time references), show your work
+ 5. Formulate a precise, concise answer based solely on the evidence in the memories
+ 6. Double-check that your answer directly addresses the question asked
+ 7. Ensure your final answer is specific and avoids vague time references
+
+ Memories for user {{speaker_1_user_id}}:
+
+ {{speaker_1_memories}}
+
+ Memories for user {{speaker_2_user_id}}:
+
+ {{speaker_2_memories}}
+
+ Question: {{question}}
+
+ Answer:
+ """
+
+
+ANSWER_PROMPT_ZEP = """
+ You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.
+
+ # CONTEXT:
+ You have access to memories from a conversation. These memories contain
+ timestamped information that may be relevant to answering the question.
+
+ # INSTRUCTIONS:
+ 1. Carefully analyze all provided memories
+ 2. Pay special attention to the timestamps to determine the answer
+ 3. If the question asks about a specific event or fact, look for direct evidence in the memories
+ 4. If the memories contain contradictory information, prioritize the most recent memory
+ 5. If there is a question about time references (like "last year", "two months ago", etc.),
+ calculate the actual date based on the memory timestamp. For example, if a memory from
+ 4 May 2022 mentions "went to India last year," then the trip occurred in 2021.
+ 6. Always convert relative time references to specific dates, months, or years. For example,
+ convert "last year" to "2022" or "two months ago" to "March 2023" based on the memory
+ timestamp. Ignore the reference while answering the question.
+ 7. Focus only on the content of the memories. Do not confuse character
+ names mentioned in memories with the actual users who created those memories.
+ 8. The answer should be less than 5-6 words.
+
+ # APPROACH (Think step by step):
+ 1. First, examine all memories that contain information related to the question
+ 2. Examine the timestamps and content of these memories carefully
+ 3. Look for explicit mentions of dates, times, locations, or events that answer the question
+ 4. If the answer requires calculation (e.g., converting relative time references), show your work
+ 5. Formulate a precise, concise answer based solely on the evidence in the memories
+ 6. Double-check that your answer directly addresses the question asked
+ 7. Ensure your final answer is specific and avoids vague time references
+
+ Memories:
+
+ {{memories}}
+
+ Question: {{question}}
+ Answer:
+ """
\ No newline at end of file
diff --git a/evaluation/run_experiments.py b/evaluation/run_experiments.py
new file mode 100644
index 00000000..2cbdd92e
--- /dev/null
+++ b/evaluation/run_experiments.py
@@ -0,0 +1,102 @@
+import os
+import json
+from src.memzero.add import MemoryADD
+from src.memzero.search import MemorySearch
+from src.utils import TECHNIQUES, METHODS
+import argparse
+from src.rag import RAGManager
+from src.langmem import LangMemManager
+from src.zep.search import ZepSearch
+from src.zep.add import ZepAdd
+from src.openai.predict import OpenAIPredict
+
+
+class Experiment:
+ def __init__(self, technique_type, chunk_size):
+ self.technique_type = technique_type
+ self.chunk_size = chunk_size
+
+ def run(self):
+ print(f"Running experiment with technique: {self.technique_type}, chunk size: {self.chunk_size}")
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Run memory experiments')
+ parser.add_argument('--technique_type', choices=TECHNIQUES, default='mem0',
+ help='Memory technique to use')
+ parser.add_argument('--method', choices=METHODS, default='add',
+ help='Method to use')
+ parser.add_argument('--chunk_size', type=int, default=1000,
+ help='Chunk size for processing')
+ parser.add_argument('--output_folder', type=str, default='results/',
+ help='Output path for results')
+ parser.add_argument('--top_k', type=int, default=30,
+ help='Number of top memories to retrieve')
+ parser.add_argument('--filter_memories', action='store_true', default=False,
+ help='Whether to filter memories')
+ parser.add_argument('--is_graph', action='store_true', default=False,
+ help='Whether to use graph-based search')
+ parser.add_argument('--num_chunks', type=int, default=1,
+ help='Number of chunks to process')
+
+ args = parser.parse_args()
+
+ # Add your experiment logic here
+ print(f"Running experiments with technique: {args.technique_type}, chunk size: {args.chunk_size}")
+
+ if args.technique_type == "mem0":
+ if args.method == "add":
+ memory_manager = MemoryADD(
+ data_path='dataset/locomo10.json',
+ is_graph=args.is_graph
+ )
+ memory_manager.process_all_conversations()
+ elif args.method == "search":
+ output_file_path = os.path.join(
+ args.output_folder,
+ f"mem0_results_top_{args.top_k}_filter_{args.filter_memories}_graph_{args.is_graph}.json"
+ )
+ memory_searcher = MemorySearch(
+ output_file_path,
+ args.top_k,
+ args.filter_memories,
+ args.is_graph
+ )
+ memory_searcher.process_data_file('dataset/locomo10.json')
+ elif args.technique_type == "rag":
+ output_file_path = os.path.join(
+ args.output_folder,
+ f"rag_results_{args.chunk_size}_k{args.num_chunks}.json"
+ )
+ rag_manager = RAGManager(
+ data_path="dataset/locomo10_rag.json",
+ chunk_size=args.chunk_size,
+ k=args.num_chunks
+ )
+ rag_manager.process_all_conversations(output_file_path)
+ elif args.technique_type == "langmem":
+ output_file_path = os.path.join(args.output_folder, "langmem_results.json")
+ langmem_manager = LangMemManager(dataset_path="dataset/locomo10_rag.json")
+ langmem_manager.process_all_conversations(output_file_path)
+ elif args.technique_type == "zep":
+ if args.method == "add":
+ zep_manager = ZepAdd(data_path="dataset/locomo10.json")
+ zep_manager.process_all_conversations("1")
+ elif args.method == "search":
+ output_file_path = os.path.join(args.output_folder, "zep_search_results.json")
+ zep_manager = ZepSearch()
+ zep_manager.process_data_file(
+ "dataset/locomo10.json",
+ "1",
+ output_file_path
+ )
+ elif args.technique_type == "openai":
+ output_file_path = os.path.join(args.output_folder, "openai_results.json")
+ openai_manager = OpenAIPredict()
+ openai_manager.process_data_file("dataset/locomo10.json", output_file_path)
+ else:
+ raise ValueError(f"Invalid technique type: {args.technique_type}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/evaluation/src/langmem.py b/evaluation/src/langmem.py
new file mode 100644
index 00000000..72478a0c
--- /dev/null
+++ b/evaluation/src/langmem.py
@@ -0,0 +1,193 @@
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.prebuilt import create_react_agent
+from langgraph.store.memory import InMemoryStore
+from langgraph.utils.config import get_store
+from langmem import (
+ create_manage_memory_tool,
+ create_search_memory_tool
+)
+import time
+import multiprocessing as mp
+import json
+from functools import partial
+import os
+from tqdm import tqdm
+from openai import OpenAI
+from collections import defaultdict
+from dotenv import load_dotenv
+from prompts import ANSWER_PROMPT
+
+load_dotenv()
+
+client = OpenAI()
+
+from jinja2 import Template
+
+ANSWER_PROMPT_TEMPLATE = Template(ANSWER_PROMPT)
+
+
+def get_answer(question, speaker_1_user_id, speaker_1_memories, speaker_2_user_id, speaker_2_memories):
+ prompt = ANSWER_PROMPT_TEMPLATE.render(
+ question=question,
+ speaker_1_user_id=speaker_1_user_id,
+ speaker_1_memories=speaker_1_memories,
+ speaker_2_user_id=speaker_2_user_id,
+ speaker_2_memories=speaker_2_memories
+ )
+
+ t1 = time.time()
+ response = client.chat.completions.create(
+ model=os.getenv("MODEL"),
+ messages=[{"role": "system", "content": prompt}],
+ temperature=0.0
+ )
+ t2 = time.time()
+ return response.choices[0].message.content, t2 - t1
+
+
+def prompt(state):
+ """Prepare the messages for the LLM."""
+ store = get_store()
+ memories = store.search(
+ ("memories",),
+ query=state["messages"][-1].content,
+ )
+ system_msg = f"""You are a helpful assistant.
+
+## Memories
+
+{memories}
+
+"""
+ return [{"role": "system", "content": system_msg}, *state["messages"]]
+
+
+class LangMem:
+ def __init__(self,):
+ self.store = InMemoryStore(
+ index={
+ "dims": 1536,
+ "embed": f"openai:{os.getenv('EMBEDDING_MODEL')}",
+ }
+ )
+ self.checkpointer = MemorySaver() # Checkpoint graph state
+
+ self.agent = create_react_agent(
+ f"openai:{os.getenv('MODEL')}",
+ prompt=prompt,
+ tools=[
+ create_manage_memory_tool(namespace=("memories",)),
+ create_search_memory_tool(namespace=("memories",)),
+ ],
+ store=self.store,
+ checkpointer=self.checkpointer,
+ )
+
+ def add_memory(self, message, config):
+ return self.agent.invoke(
+ {"messages": [{"role": "user", "content": message}]},
+ config=config
+ )
+
+ def search_memory(self, query, config):
+ try:
+ t1 = time.time()
+ response = self.agent.invoke(
+ {"messages": [{"role": "user", "content": query}]},
+ config=config
+ )
+ t2 = time.time()
+ return response["messages"][-1].content, t2 - t1
+ except Exception as e:
+ print(f"Error in search_memory: {e}")
+ return "", t2 - t1
+
+
+class LangMemManager:
+ def __init__(self, dataset_path):
+ self.dataset_path = dataset_path
+ with open(self.dataset_path, 'r') as f:
+ self.data = json.load(f)
+
+ def process_all_conversations(self, output_file_path):
+ OUTPUT = defaultdict(list)
+
+ # Process conversations in parallel with multiple workers
+ def process_conversation(key_value_pair):
+ key, value = key_value_pair
+ result = defaultdict(list)
+
+ chat_history = value["conversation"]
+ questions = value["question"]
+
+ agent1 = LangMem()
+ agent2 = LangMem()
+ config = {"configurable": {"thread_id": f"thread-{key}"}}
+ speakers = set()
+
+ # Identify speakers
+ for conv in chat_history:
+ speakers.add(conv['speaker'])
+
+ if len(speakers) != 2:
+ raise ValueError(f"Expected 2 speakers, got {len(speakers)}")
+
+ speaker1 = list(speakers)[0]
+ speaker2 = list(speakers)[1]
+
+ # Add memories for each message
+ for conv in tqdm(chat_history, desc=f"Processing messages {key}", leave=False):
+ message = f"{conv['timestamp']} | {conv['speaker']}: {conv['text']}"
+ if conv['speaker'] == speaker1:
+ agent1.add_memory(message, config)
+ elif conv['speaker'] == speaker2:
+ agent2.add_memory(message, config)
+ else:
+ raise ValueError(f"Expected speaker1 or speaker2, got {conv['speaker']}")
+
+ # Process questions
+ for q in tqdm(questions, desc=f"Processing questions {key}", leave=False):
+ category = q['category']
+
+ if int(category) == 5:
+ continue
+
+ answer = q['answer']
+ question = q['question']
+ response1, speaker1_memory_time = agent1.search_memory(question, config)
+ response2, speaker2_memory_time = agent2.search_memory(question, config)
+
+ generated_answer, response_time = get_answer(
+ question, speaker1, response1, speaker2, response2
+ )
+
+ result[key].append({
+ "question": question,
+ "answer": answer,
+ "response1": response1,
+ "response2": response2,
+ "category": category,
+ "speaker1_memory_time": speaker1_memory_time,
+ "speaker2_memory_time": speaker2_memory_time,
+ "response_time": response_time,
+ 'response': generated_answer
+ })
+
+ return result
+
+ # Use multiprocessing to process conversations in parallel
+ with mp.Pool(processes=10) as pool:
+ results = list(tqdm(
+ pool.imap(process_conversation, list(self.data.items())),
+ total=len(self.data),
+ desc="Processing conversations"
+ ))
+
+ # Combine results from all workers
+ for result in results:
+ for key, items in result.items():
+ OUTPUT[key].extend(items)
+
+ # Save final results
+ with open(output_file_path, 'w') as f:
+ json.dump(OUTPUT, f, indent=4)
diff --git a/evaluation/src/memzero/add.py b/evaluation/src/memzero/add.py
new file mode 100644
index 00000000..5200cba2
--- /dev/null
+++ b/evaluation/src/memzero/add.py
@@ -0,0 +1,141 @@
+from mem0 import MemoryClient
+import json
+import time
+import os
+import threading
+from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+# Update custom instructions
+custom_instructions ="""
+Generate personal memories that follow these guidelines:
+
+1. Each memory should be self-contained with complete context, including:
+ - The person's name, do not use "user" while creating memories
+ - Personal details (career aspirations, hobbies, life circumstances)
+ - Emotional states and reactions
+ - Ongoing journeys or future plans
+ - Specific dates when events occurred
+
+2. Include meaningful personal narratives focusing on:
+ - Identity and self-acceptance journeys
+ - Family planning and parenting
+ - Creative outlets and hobbies
+ - Mental health and self-care activities
+ - Career aspirations and education goals
+ - Important life events and milestones
+
+3. Make each memory rich with specific details rather than general statements
+ - Include timeframes (exact dates when possible)
+ - Name specific activities (e.g., "charity race for mental health" rather than just "exercise")
+ - Include emotional context and personal growth elements
+
+4. Extract memories only from user messages, not incorporating assistant responses
+
+5. Format each memory as a paragraph with a clear narrative structure that captures the person's experience, challenges, and aspirations
+"""
+
+
+class MemoryADD:
+ def __init__(self, data_path=None, batch_size=2, is_graph=False):
+ self.mem0_client = MemoryClient(
+ api_key=os.getenv("MEM0_API_KEY"),
+ org_id=os.getenv("MEM0_ORGANIZATION_ID"),
+ project_id=os.getenv("MEM0_PROJECT_ID")
+ )
+
+ self.mem0_client.update_project(custom_instructions=custom_instructions)
+ self.batch_size = batch_size
+ self.data_path = data_path
+ self.data = None
+ self.is_graph = is_graph
+ if data_path:
+ self.load_data()
+
+ def load_data(self):
+ with open(self.data_path, 'r') as f:
+ self.data = json.load(f)
+ return self.data
+
+ def add_memory(self, user_id, message, metadata, retries=3):
+ for attempt in range(retries):
+ try:
+ _ = self.mem0_client.add(message, user_id=user_id, version="v2",
+ metadata=metadata, enable_graph=self.is_graph)
+ return
+ except Exception as e:
+ if attempt < retries - 1:
+ time.sleep(1) # Wait before retrying
+ continue
+ else:
+ raise e
+
+ def add_memories_for_speaker(self, speaker, messages, timestamp, desc):
+ for i in tqdm(range(0, len(messages), self.batch_size), desc=desc):
+ batch_messages = messages[i:i+self.batch_size]
+ self.add_memory(speaker, batch_messages, metadata={"timestamp": timestamp})
+
+ def process_conversation(self, item, idx):
+ conversation = item['conversation']
+ speaker_a = conversation['speaker_a']
+ speaker_b = conversation['speaker_b']
+
+ speaker_a_user_id = f"{speaker_a}_{idx}"
+ speaker_b_user_id = f"{speaker_b}_{idx}"
+
+ # delete all memories for the two users
+ self.mem0_client.delete_all(user_id=speaker_a_user_id)
+ self.mem0_client.delete_all(user_id=speaker_b_user_id)
+
+ for key in conversation.keys():
+ if key in ['speaker_a', 'speaker_b'] or "date" in key or "timestamp" in key:
+ continue
+
+ date_time_key = key + "_date_time"
+ timestamp = conversation[date_time_key]
+ chats = conversation[key]
+
+ messages = []
+ messages_reverse = []
+ for chat in chats:
+ if chat['speaker'] == speaker_a:
+ messages.append({"role": "user", "content": f"{speaker_a}: {chat['text']}"})
+ messages_reverse.append({"role": "assistant", "content": f"{speaker_a}: {chat['text']}"})
+ elif chat['speaker'] == speaker_b:
+ messages.append({"role": "assistant", "content": f"{speaker_b}: {chat['text']}"})
+ messages_reverse.append({"role": "user", "content": f"{speaker_b}: {chat['text']}"})
+ else:
+ raise ValueError(f"Unknown speaker: {chat['speaker']}")
+
+ # add memories for the two users on different threads
+ thread_a = threading.Thread(
+ target=self.add_memories_for_speaker,
+ args=(speaker_a_user_id, messages, timestamp, "Adding Memories for Speaker A")
+ )
+ thread_b = threading.Thread(
+ target=self.add_memories_for_speaker,
+ args=(speaker_b_user_id, messages_reverse, timestamp, "Adding Memories for Speaker B")
+ )
+
+ thread_a.start()
+ thread_b.start()
+ thread_a.join()
+ thread_b.join()
+
+ print("Messages added successfully")
+
+ def process_all_conversations(self, max_workers=10):
+ if not self.data:
+ raise ValueError("No data loaded. Please set data_path and call load_data() first.")
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
+ futures = [
+ executor.submit(self.process_conversation, item, idx)
+ for idx, item in enumerate(self.data)
+ ]
+
+ for future in futures:
+ future.result()
\ No newline at end of file
diff --git a/evaluation/src/memzero/search.py b/evaluation/src/memzero/search.py
new file mode 100644
index 00000000..68d62f4d
--- /dev/null
+++ b/evaluation/src/memzero/search.py
@@ -0,0 +1,189 @@
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+from mem0 import MemoryClient
+import json
+import time
+from jinja2 import Template
+from openai import OpenAI
+from prompts import ANSWER_PROMPT_GRAPH, ANSWER_PROMPT
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+class MemorySearch:
+
+ def __init__(self, output_path='results.json', top_k=10, filter_memories=False, is_graph=False):
+ self.mem0_client = MemoryClient(
+ api_key=os.getenv("MEM0_API_KEY"),
+ org_id=os.getenv("MEM0_ORGANIZATION_ID"),
+ project_id=os.getenv("MEM0_PROJECT_ID")
+ )
+ self.top_k = top_k
+ self.openai_client = OpenAI()
+ self.results = defaultdict(list)
+ self.output_path = output_path
+ self.filter_memories = filter_memories
+ self.is_graph = is_graph
+
+ if self.is_graph:
+ self.ANSWER_PROMPT = ANSWER_PROMPT_GRAPH
+ else:
+ self.ANSWER_PROMPT = ANSWER_PROMPT
+
+ def search_memory(self, user_id, query, max_retries=3, retry_delay=1):
+ start_time = time.time()
+ retries = 0
+ while retries < max_retries:
+ try:
+ if self.is_graph:
+ print("Searching with graph")
+ memories = self.mem0_client.search(query, user_id=user_id, top_k=self.top_k,
+ filter_memories=self.filter_memories, enable_graph=True, output_format='v1.1')
+ else:
+ memories = self.mem0_client.search(query, user_id=user_id, top_k=self.top_k,
+ filter_memories=self.filter_memories)
+ break
+ except Exception as e:
+ print("Retrying...")
+ retries += 1
+ if retries >= max_retries:
+ raise e
+ time.sleep(retry_delay)
+
+ end_time = time.time()
+ if not self.is_graph:
+ semantic_memories = [{'memory': memory['memory'],
+ 'timestamp': memory['metadata']['timestamp'],
+ 'score': round(memory['score'], 2)}
+ for memory in memories]
+ graph_memories = None
+ else:
+ semantic_memories = [{'memory': memory['memory'],
+ 'timestamp': memory['metadata']['timestamp'],
+ 'score': round(memory['score'], 2)} for memory in memories['results']]
+ graph_memories = [{"source": relation['source'], "relationship": relation['relationship'], "target": relation['target']} for relation in memories['relations']]
+ return semantic_memories, graph_memories, end_time - start_time
+
+ def answer_question(self, speaker_1_user_id, speaker_2_user_id, question, answer, category):
+ speaker_1_memories, speaker_1_graph_memories, speaker_1_memory_time = self.search_memory(speaker_1_user_id, question)
+ speaker_2_memories, speaker_2_graph_memories, speaker_2_memory_time = self.search_memory(speaker_2_user_id, question)
+
+ search_1_memory = [f"{item['timestamp']}: {item['memory']}"
+ for item in speaker_1_memories]
+ search_2_memory = [f"{item['timestamp']}: {item['memory']}"
+ for item in speaker_2_memories]
+
+ template = Template(self.ANSWER_PROMPT)
+ answer_prompt = template.render(
+ speaker_1_user_id=speaker_1_user_id.split('_')[0],
+ speaker_2_user_id=speaker_2_user_id.split('_')[0],
+ speaker_1_memories=json.dumps(search_1_memory, indent=4),
+ speaker_2_memories=json.dumps(search_2_memory, indent=4),
+ speaker_1_graph_memories=json.dumps(speaker_1_graph_memories, indent=4),
+ speaker_2_graph_memories=json.dumps(speaker_2_graph_memories, indent=4),
+ question=question
+ )
+
+ t1 = time.time()
+ response = self.openai_client.chat.completions.create(
+ model=os.getenv("MODEL"),
+ messages=[
+ {"role": "system", "content": answer_prompt}
+ ],
+ temperature=0.0
+ )
+ t2 = time.time()
+ response_time = t2 - t1
+ return response.choices[0].message.content, speaker_1_memories, speaker_2_memories, speaker_1_memory_time, speaker_2_memory_time, speaker_1_graph_memories, speaker_2_graph_memories, response_time
+
+ def process_question(self, val, speaker_a_user_id, speaker_b_user_id):
+ question = val.get('question', '')
+ answer = val.get('answer', '')
+ category = val.get('category', -1)
+ evidence = val.get('evidence', [])
+ adversarial_answer = val.get('adversarial_answer', '')
+
+ response, speaker_1_memories, speaker_2_memories, speaker_1_memory_time, speaker_2_memory_time, speaker_1_graph_memories, speaker_2_graph_memories, response_time = self.answer_question(
+ speaker_a_user_id,
+ speaker_b_user_id,
+ question,
+ answer,
+ category
+ )
+
+ result = {
+ "question": question,
+ "answer": answer,
+ "category": category,
+ "evidence": evidence,
+ "response": response,
+ "adversarial_answer": adversarial_answer,
+ "speaker_1_memories": speaker_1_memories,
+ "speaker_2_memories": speaker_2_memories,
+ 'num_speaker_1_memories': len(speaker_1_memories),
+ 'num_speaker_2_memories': len(speaker_2_memories),
+ 'speaker_1_memory_time': speaker_1_memory_time,
+ 'speaker_2_memory_time': speaker_2_memory_time,
+ "speaker_1_graph_memories": speaker_1_graph_memories,
+ "speaker_2_graph_memories": speaker_2_graph_memories,
+ "response_time": response_time
+ }
+
+ # Save results after each question is processed
+ with open(self.output_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+
+ return result
+
+ def process_data_file(self, file_path):
+ with open(file_path, 'r') as f:
+ data = json.load(f)
+
+ for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing conversations"):
+ qa = item['qa']
+ conversation = item['conversation']
+ speaker_a = conversation['speaker_a']
+ speaker_b = conversation['speaker_b']
+
+ speaker_a_user_id = f"{speaker_a}_{idx}"
+ speaker_b_user_id = f"{speaker_b}_{idx}"
+
+ for question_item in tqdm(qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False):
+ result = self.process_question(
+ question_item,
+ speaker_a_user_id,
+ speaker_b_user_id
+ )
+ self.results[idx].append(result)
+
+ # Save results after each question is processed
+ with open(self.output_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+
+ # Final save at the end
+ with open(self.output_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+
+ def process_questions_parallel(self, qa_list, speaker_a_user_id, speaker_b_user_id, max_workers=1):
+ def process_single_question(val):
+ result = self.process_question(val, speaker_a_user_id, speaker_b_user_id)
+ # Save results after each question is processed
+ with open(self.output_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+ return result
+
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
+ results = list(tqdm(
+ executor.map(process_single_question, qa_list),
+ total=len(qa_list),
+ desc="Answering Questions"
+ ))
+
+ # Final save at the end
+ with open(self.output_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+
+ return results
diff --git a/evaluation/src/openai/predict.py b/evaluation/src/openai/predict.py
new file mode 100644
index 00000000..bc1d3440
--- /dev/null
+++ b/evaluation/src/openai/predict.py
@@ -0,0 +1,143 @@
+from openai import OpenAI
+import os
+import json
+from jinja2 import Template
+from tqdm import tqdm
+import time
+from collections import defaultdict
+from dotenv import load_dotenv
+import argparse
+
+load_dotenv()
+
+
+ANSWER_PROMPT = """
+ You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.
+
+ # CONTEXT:
+ You have access to memories from a conversation. These memories contain
+ timestamped information that may be relevant to answering the question.
+
+ # INSTRUCTIONS:
+ 1. Carefully analyze all provided memories
+ 2. Pay special attention to the timestamps to determine the answer
+ 3. If the question asks about a specific event or fact, look for direct evidence in the memories
+ 4. If the memories contain contradictory information, prioritize the most recent memory
+ 5. If there is a question about time references (like "last year", "two months ago", etc.),
+ calculate the actual date based on the memory timestamp. For example, if a memory from
+ 4 May 2022 mentions "went to India last year," then the trip occurred in 2021.
+ 6. Always convert relative time references to specific dates, months, or years. For example,
+ convert "last year" to "2022" or "two months ago" to "March 2023" based on the memory
+ timestamp. Ignore the reference while answering the question.
+ 7. Focus only on the content of the memories. Do not confuse character
+ names mentioned in memories with the actual users who created those memories.
+ 8. The answer should be less than 5-6 words.
+
+ # APPROACH (Think step by step):
+ 1. First, examine all memories that contain information related to the question
+ 2. Examine the timestamps and content of these memories carefully
+ 3. Look for explicit mentions of dates, times, locations, or events that answer the question
+ 4. If the answer requires calculation (e.g., converting relative time references), show your work
+ 5. Formulate a precise, concise answer based solely on the evidence in the memories
+ 6. Double-check that your answer directly addresses the question asked
+ 7. Ensure your final answer is specific and avoids vague time references
+
+ Memories:
+
+ {{memories}}
+
+ Question: {{question}}
+ Answer:
+ """
+
+
+class OpenAIPredict:
+ def __init__(self, model="gpt-4o-mini"):
+ self.model = model
+ self.openai_client = OpenAI()
+ self.results = defaultdict(list)
+
+ def search_memory(self, idx):
+
+ with open(f'memories/{idx}.txt', 'r') as file:
+ memories = file.read()
+
+ return memories, 0
+
+ def process_question(self, val, idx):
+ question = val.get('question', '')
+ answer = val.get('answer', '')
+ category = val.get('category', -1)
+ evidence = val.get('evidence', [])
+ adversarial_answer = val.get('adversarial_answer', '')
+
+ response, search_memory_time, response_time, context = self.answer_question(
+ idx,
+ question
+ )
+
+ result = {
+ "question": question,
+ "answer": answer,
+ "category": category,
+ "evidence": evidence,
+ "response": response,
+ "adversarial_answer": adversarial_answer,
+ "search_memory_time": search_memory_time,
+ "response_time": response_time,
+ "context": context
+ }
+
+ return result
+
+ def answer_question(self, idx, question):
+ memories, search_memory_time = self.search_memory(idx)
+
+ template = Template(ANSWER_PROMPT)
+ answer_prompt = template.render(
+ memories=memories,
+ question=question
+ )
+
+ t1 = time.time()
+ response = self.openai_client.chat.completions.create(
+ model=os.getenv("MODEL"),
+ messages=[
+ {"role": "system", "content": answer_prompt}
+ ],
+ temperature=0.0
+ )
+ t2 = time.time()
+ response_time = t2 - t1
+ return response.choices[0].message.content, search_memory_time, response_time, memories
+
+ def process_data_file(self, file_path, output_file_path):
+ with open(file_path, 'r') as f:
+ data = json.load(f)
+
+ for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing conversations"):
+ qa = item['qa']
+
+ for question_item in tqdm(qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False):
+ result = self.process_question(
+ question_item,
+ idx
+ )
+ self.results[idx].append(result)
+
+ # Save results after each question is processed
+ with open(output_file_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+
+ # Final save at the end
+ with open(output_file_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--output_file_path", type=str, required=True)
+ args = parser.parse_args()
+ openai_predict = OpenAIPredict()
+ openai_predict.process_data_file("../../dataset/locomo10.json", args.output_file_path)
+
diff --git a/evaluation/src/rag.py b/evaluation/src/rag.py
new file mode 100644
index 00000000..99814d5a
--- /dev/null
+++ b/evaluation/src/rag.py
@@ -0,0 +1,197 @@
+from openai import OpenAI
+import json
+import numpy as np
+from tqdm import tqdm
+from jinja2 import Template
+import tiktoken
+import time
+from collections import defaultdict
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+PROMPT = """
+# Question:
+{{QUESTION}}
+
+# Context:
+{{CONTEXT}}
+
+# Short answer:
+"""
+
+
+class RAGManager:
+ def __init__(self, data_path="dataset/locomo10_rag.json", chunk_size=500, k=1):
+ self.model = os.getenv("MODEL")
+ self.client = OpenAI()
+ self.data_path = data_path
+ self.chunk_size = chunk_size
+ self.k = k
+
+ def generate_response(self, question, context):
+ template = Template(PROMPT)
+ prompt = template.render(
+ CONTEXT=context,
+ QUESTION=question
+ )
+
+ max_retries = 3
+ retries = 0
+
+ while retries <= max_retries:
+ try:
+ t1 = time.time()
+ response = self.client.chat.completions.create(
+ model=self.model,
+ messages=[
+ {"role": "system",
+ "content": "You are a helpful assistant that can answer "
+ "questions based on the provided context."
+ "If the question involves timing, use the conversation date for reference."
+ "Provide the shortest possible answer."
+ "Use words directly from the conversation when possible."
+ "Avoid using subjects in your answer."},
+ {"role": "user", "content": prompt}
+ ],
+ temperature=0
+ )
+ t2 = time.time()
+ return response.choices[0].message.content.strip(), t2-t1
+ except Exception as e:
+ retries += 1
+ if retries > max_retries:
+ raise e
+ time.sleep(1) # Wait before retrying
+
+ def clean_chat_history(self, chat_history):
+ cleaned_chat_history = ""
+ for c in chat_history:
+ cleaned_chat_history += (f"{c['timestamp']} | {c['speaker']}: "
+ f"{c['text']}\n")
+
+ return cleaned_chat_history
+
+ def calculate_embedding(self, document):
+ response = self.client.embeddings.create(
+ model=os.getenv("EMBEDDING_MODEL"),
+ input=document
+ )
+ return response.data[0].embedding
+
+ def calculate_similarity(self, embedding1, embedding2):
+ return np.dot(embedding1, embedding2) / (
+ np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
+
+ def search(self, query, chunks, embeddings, k=1):
+ """
+ Search for the top-k most similar chunks to the query.
+
+ Args:
+ query: The query string
+ chunks: List of text chunks
+ embeddings: List of embeddings for each chunk
+ k: Number of top chunks to return (default: 1)
+
+ Returns:
+ combined_chunks: The combined text of the top-k chunks
+ search_time: Time taken for the search
+ """
+ t1 = time.time()
+ query_embedding = self.calculate_embedding(query)
+ similarities = [
+ self.calculate_similarity(query_embedding, embedding)
+ for embedding in embeddings
+ ]
+
+ # Get indices of top-k most similar chunks
+ if k == 1:
+ # Original behavior - just get the most similar chunk
+ top_indices = [np.argmax(similarities)]
+ else:
+ # Get indices of top-k chunks
+ top_indices = np.argsort(similarities)[-k:][::-1]
+
+ # Combine the top-k chunks
+ combined_chunks = "\n<->\n".join([chunks[i] for i in top_indices])
+
+ t2 = time.time()
+ return combined_chunks, t2-t1
+
+ def create_chunks(self, chat_history, chunk_size=500):
+ """
+ Create chunks using tiktoken for more accurate token counting
+ """
+ # Get the encoding for the model
+ encoding = tiktoken.encoding_for_model(os.getenv("EMBEDDING_MODEL"))
+
+ documents = self.clean_chat_history(chat_history)
+
+ if chunk_size == -1:
+ return [documents], []
+
+ chunks = []
+
+ # Encode the document
+ tokens = encoding.encode(documents)
+
+ # Split into chunks based on token count
+ for i in range(0, len(tokens), chunk_size):
+ chunk_tokens = tokens[i:i+chunk_size]
+ chunk = encoding.decode(chunk_tokens)
+ chunks.append(chunk)
+
+ embeddings = []
+ for chunk in chunks:
+ embedding = self.calculate_embedding(chunk)
+ embeddings.append(embedding)
+
+ return chunks, embeddings
+
+ def process_all_conversations(self, output_file_path):
+ with open(self.data_path, "r") as f:
+ data = json.load(f)
+
+ FINAL_RESULTS = defaultdict(list)
+ for key, value in tqdm(data.items(), desc="Processing conversations"):
+ chat_history = value["conversation"]
+ questions = value["question"]
+
+ chunks, embeddings = self.create_chunks(
+ chat_history, self.chunk_size
+ )
+
+ for item in tqdm(
+ questions, desc="Answering questions", leave=False
+ ):
+ question = item["question"]
+ answer = item.get("answer", "")
+ category = item["category"]
+
+ if self.chunk_size == -1:
+ context = chunks[0]
+ search_time = 0
+ else:
+ context, search_time = self.search(
+ question, chunks, embeddings, k=self.k
+ )
+ response, response_time = self.generate_response(
+ question, context
+ )
+
+ FINAL_RESULTS[key].append({
+ "question": question,
+ "answer": answer,
+ "category": category,
+ "context": context,
+ "response": response,
+ "search_time": search_time,
+ "response_time": response_time,
+ })
+ with open(output_file_path, "w+") as f:
+ json.dump(FINAL_RESULTS, f, indent=4)
+
+ # Save results
+ with open(output_file_path, "w+") as f:
+ json.dump(FINAL_RESULTS, f, indent=4)
diff --git a/evaluation/src/utils.py b/evaluation/src/utils.py
new file mode 100644
index 00000000..b8f5ecf5
--- /dev/null
+++ b/evaluation/src/utils.py
@@ -0,0 +1,12 @@
+TECHNIQUES = [
+ "mem0",
+ "rag",
+ "langmem",
+ "zep",
+ "openai"
+]
+
+METHODS = [
+ "add",
+ "search"
+]
diff --git a/evaluation/src/zep/add.py b/evaluation/src/zep/add.py
new file mode 100644
index 00000000..48ac2f19
--- /dev/null
+++ b/evaluation/src/zep/add.py
@@ -0,0 +1,73 @@
+import argparse
+import json
+import os
+from dotenv import load_dotenv
+from tqdm import tqdm
+from zep_cloud import Message
+from zep_cloud.client import Zep
+
+load_dotenv()
+
+
+class ZepAdd:
+ def __init__(self, data_path=None):
+ self.zep_client = Zep(api_key=os.getenv("ZEP_API_KEY"))
+ self.data_path = data_path
+ self.data = None
+ if data_path:
+ self.load_data()
+
+ def load_data(self):
+ with open(self.data_path, 'r') as f:
+ self.data = json.load(f)
+ return self.data
+
+ def process_conversation(self, run_id, item, idx):
+ conversation = item['conversation']
+
+ user_id = f"run_id_{run_id}_experiment_user_{idx}"
+ session_id = f"run_id_{run_id}_experiment_session_{idx}"
+
+ # # delete all memories for the two users
+ # self.zep_client.user.delete(user_id=user_id)
+ # self.zep_client.memory.delete(session_id=session_id)
+
+ self.zep_client.user.add(user_id=user_id)
+ self.zep_client.memory.add_session(
+ user_id=user_id,
+ session_id=session_id,
+ )
+
+ print("Starting to add memories... for user", user_id)
+ for key in tqdm(conversation.keys(), desc=f"Processing user {user_id}"):
+ if key in ['speaker_a', 'speaker_b'] or "date" in key:
+ continue
+
+ date_time_key = key + "_date_time"
+ timestamp = conversation[date_time_key]
+ chats = conversation[key]
+
+ for chat in tqdm(chats, desc=f"Adding chats for {key}", leave=False):
+ self.zep_client.memory.add(
+ session_id=session_id,
+ messages=[Message(
+ role=chat['speaker'],
+ role_type="user",
+ content=f"{timestamp}: {chat['text']}",
+ )]
+ )
+
+ def process_all_conversations(self, run_id):
+ if not self.data:
+ raise ValueError("No data loaded. Please set data_path and call load_data() first.")
+ for idx, item in tqdm(enumerate(self.data)):
+ if idx == 0:
+ self.process_conversation(run_id, item, idx)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--run_id", type=str, required=True)
+ args = parser.parse_args()
+ zep_add = ZepAdd(data_path="../../dataset/locomo10.json")
+ zep_add.process_all_conversations(args.run_id)
\ No newline at end of file
diff --git a/evaluation/src/zep/search.py b/evaluation/src/zep/search.py
new file mode 100644
index 00000000..7811cab1
--- /dev/null
+++ b/evaluation/src/zep/search.py
@@ -0,0 +1,148 @@
+import argparse
+from collections import defaultdict
+from dotenv import load_dotenv
+from jinja2 import Template
+from openai import OpenAI
+from tqdm import tqdm
+from zep_cloud import EntityEdge, EntityNode
+from zep_cloud.client import Zep
+import json
+import os
+import pandas as pd
+import time
+from prompts import ANSWER_PROMPT_ZEP
+
+load_dotenv()
+
+TEMPLATE = """
+FACTS and ENTITIES represent relevant context to the current conversation.
+
+# These are the most relevant facts and their valid date ranges
+# format: FACT (Date range: from - to)
+
+{facts}
+
+
+# These are the most relevant entities
+# ENTITY_NAME: entity summary
+
+{entities}
+
+"""
+
+
+class ZepSearch:
+ def __init__(self):
+ self.zep_client = Zep(api_key=os.getenv("ZEP_API_KEY"))
+ self.results = defaultdict(list)
+ self.openai_client = OpenAI()
+
+ def format_edge_date_range(self, edge: EntityEdge) -> str:
+ # return f"{datetime(edge.valid_at).strftime('%Y-%m-%d %H:%M:%S') if edge.valid_at else 'date unknown'} - {(edge.invalid_at.strftime('%Y-%m-%d %H:%M:%S') if edge.invalid_at else 'present')}"
+ return f"{edge.valid_at if edge.valid_at else 'date unknown'} - {(edge.invalid_at if edge.invalid_at else 'present')}"
+
+ def compose_search_context(self, edges: list[EntityEdge], nodes: list[EntityNode]) -> str:
+ facts = [f' - {edge.fact} ({self.format_edge_date_range(edge)})' for edge in edges]
+ entities = [f' - {node.name}: {node.summary}' for node in nodes]
+ return TEMPLATE.format(facts='\n'.join(facts), entities='\n'.join(entities))
+
+ def search_memory(self, run_id, idx, query, max_retries=3, retry_delay=1):
+ start_time = time.time()
+ retries = 0
+ while retries < max_retries:
+ try:
+ user_id = f"run_id_{run_id}_experiment_user_{idx}"
+ session_id = f"run_id_{run_id}_experiment_session_{idx}"
+ edges_results = (self.zep_client.graph.search(user_id=user_id, reranker='cross_encoder', query=query, scope='edges', limit=20)).edges
+ node_results = (self.zep_client.graph.search(user_id=user_id, reranker='rrf', query=query, scope='nodes', limit=20)).nodes
+ context = self.compose_search_context(edges_results, node_results)
+ break
+ except Exception as e:
+ print("Retrying...")
+ retries += 1
+ if retries >= max_retries:
+ raise e
+ time.sleep(retry_delay)
+
+ end_time = time.time()
+
+ return context, end_time - start_time
+
+ def process_question(self, run_id, val, idx):
+ question = val.get('question', '')
+ answer = val.get('answer', '')
+ category = val.get('category', -1)
+ evidence = val.get('evidence', [])
+ adversarial_answer = val.get('adversarial_answer', '')
+
+ response, search_memory_time, response_time, context = self.answer_question(
+ run_id,
+ idx,
+ question
+ )
+
+ result = {
+ "question": question,
+ "answer": answer,
+ "category": category,
+ "evidence": evidence,
+ "response": response,
+ "adversarial_answer": adversarial_answer,
+ "search_memory_time": search_memory_time,
+ "response_time": response_time,
+ "context": context
+ }
+
+ return result
+
+ def answer_question(self, run_id, idx, question):
+ context, search_memory_time = self.search_memory(run_id, idx, question)
+
+ template = Template(ANSWER_PROMPT_ZEP)
+ answer_prompt = template.render(
+ memories=context,
+ question=question
+ )
+
+ t1 = time.time()
+ response = self.openai_client.chat.completions.create(
+ model=os.getenv("MODEL"),
+ messages=[
+ {"role": "system", "content": answer_prompt}
+ ],
+ temperature=0.0
+ )
+ t2 = time.time()
+ response_time = t2 - t1
+ return response.choices[0].message.content, search_memory_time, response_time, context
+
+ def process_data_file(self, file_path, run_id, output_file_path):
+ with open(file_path, 'r') as f:
+ data = json.load(f)
+
+ for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing conversations"):
+ qa = item['qa']
+
+ for question_item in tqdm(qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False):
+ result = self.process_question(
+ run_id,
+ question_item,
+ idx
+ )
+ self.results[idx].append(result)
+
+ # Save results after each question is processed
+ with open(output_file_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+
+ # Final save at the end
+ with open(output_file_path, 'w') as f:
+ json.dump(self.results, f, indent=4)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--run_id", type=str, required=True)
+ args = parser.parse_args()
+ zep_search = ZepSearch()
+ zep_search.process_data_file("../../dataset/locomo10.json", args.run_id, "results/zep_search_results.json")