Add support for procedural memory (#2460)

2025-03-29 15:58:12 -07:00
parent 2bf9286071
commit 72bb631bb5
10 changed files with 158 additions and 20 deletions
--- a/mem0/init.py
+++ b/mem0/init.py
@@ -2,5 +2,5 @@ import importlib.metadata

 __version__ = importlib.metadata.version("mem0ai")

-from mem0.client.main import MemoryClient, AsyncMemoryClient  # noqa
+from mem0.client.main import AsyncMemoryClient, MemoryClient  # noqa
 from mem0.memory.main import Memory  # noqa
--- a/mem0/configs/enums.py
+++ b/mem0/configs/enums.py
@@ -0,0 +1,7 @@
+from enum import Enum
+
+
+class MemoryType(Enum):
+    SEMANTIC = "semantic_memory"
+    EPISODIC = "episodic_memory"
+    PROCEDURAL = "procedural_memory"
--- a/mem0/configs/prompts.py
+++ b/mem0/configs/prompts.py
@@ -208,6 +208,83 @@ Please note to return the IDs in the output from the input IDs only and do not g
        }
 """

+PROCEDURAL_MEMORY_SYSTEM_PROMPT = """
+You are a memory summarization system that records and preserves the complete interaction history between a human and an AI agent. You are provided with the agent’s execution history over the past N steps. Your task is to produce a comprehensive summary of the agent's output history that contains every detail necessary for the agent to continue the task without ambiguity. **Every output produced by the agent must be recorded verbatim as part of the summary.**
+
+### Overall Structure:
+- **Overview (Global Metadata):**
+  - **Task Objective**: The overall goal the agent is working to accomplish.
+  - **Progress Status**: The current completion percentage and summary of specific milestones or steps completed.
+
+- **Sequential Agent Actions (Numbered Steps):**
+  Each numbered step must be a self-contained entry that includes all of the following elements:
+
+  1. **Agent Action**:
+     - Precisely describe what the agent did (e.g., "Clicked on the 'Blog' link", "Called API to fetch content", "Scraped page data").  
+     - Include all parameters, target elements, or methods involved.
+     
+  2. **Action Result (Mandatory, Unmodified)**:  
+     - Immediately follow the agent action with its exact, unaltered output.  
+     - Record all returned data, responses, HTML snippets, JSON content, or error messages exactly as received. This is critical for constructing the final output later.
+     
+  3. **Embedded Metadata**:  
+     For the same numbered step, include additional context such as:
+     - **Key Findings**: Any important information discovered (e.g., URLs, data points, search results).
+     - **Navigation History**: For browser agents, detail which pages were visited, including their URLs and relevance.
+     - **Errors & Challenges**: Document any error messages, exceptions, or challenges encountered along with any attempted recovery or troubleshooting.
+     - **Current Context**: Describe the state after the action (e.g., "Agent is on the blog detail page" or "JSON data stored for further processing") and what the agent plans to do next.
+
+### Guidelines:
+1. **Preserve Every Output**: The exact output of each agent action is essential. Do not paraphrase or summarize the output. It must be stored as is for later use.
+2. **Chronological Order**: Number the agent actions sequentially in the order they occurred. Each numbered step is a complete record of that action.
+3. **Detail and Precision**:
+   - Use exact data: Include URLs, element indexes, error messages, JSON responses, and any other concrete values.
+   - Preserve numeric counts and metrics (e.g., "3 out of 5 items processed").
+   - For any errors, include the full error message and, if applicable, the stack trace or cause.
+4. **Output Only the Summary**: The final output must consist solely of the structured summary with no additional commentary or preamble.
+
+### Example Template:
+
+```
+**Task Objective**: Scrape blog post titles and full content from the OpenAI blog.
+**Progress Status**: 10% complete — 5 out of 50 blog posts processed.
+
+1. **Agent Action**: Opened URL "https://openai.com"  
+   **Action Result**:  
+      "HTML Content of the homepage including navigation bar with links: 'Blog', 'API', 'ChatGPT', etc."  
+   **Key Findings**: Navigation bar loaded correctly.  
+   **Navigation History**: Visited homepage: "https://openai.com"  
+   **Current Context**: Homepage loaded; ready to click on the 'Blog' link.
+
+2. **Agent Action**: Clicked on the "Blog" link in the navigation bar.  
+   **Action Result**:  
+      "Navigated to 'https://openai.com/blog/' with the blog listing fully rendered."  
+   **Key Findings**: Blog listing shows 10 blog previews.  
+   **Navigation History**: Transitioned from homepage to blog listing page.  
+   **Current Context**: Blog listing page displayed.
+
+3. **Agent Action**: Extracted the first 5 blog post links from the blog listing page.  
+   **Action Result**:  
+      "[ '/blog/chatgpt-updates', '/blog/ai-and-education', '/blog/openai-api-announcement', '/blog/gpt-4-release', '/blog/safety-and-alignment' ]"  
+   **Key Findings**: Identified 5 valid blog post URLs.  
+   **Current Context**: URLs stored in memory for further processing.
+
+4. **Agent Action**: Visited URL "https://openai.com/blog/chatgpt-updates"  
+   **Action Result**:  
+      "HTML content loaded for the blog post including full article text."  
+   **Key Findings**: Extracted blog title "ChatGPT Updates – March 2025" and article content excerpt.  
+   **Current Context**: Blog post content extracted and stored.
+
+5. **Agent Action**: Extracted blog title and full article content from "https://openai.com/blog/chatgpt-updates"  
+   **Action Result**:  
+      "{ 'title': 'ChatGPT Updates – March 2025', 'content': 'We\'re introducing new updates to ChatGPT, including improved browsing capabilities and memory recall... (full content)' }"  
+   **Key Findings**: Full content captured for later summarization.  
+   **Current Context**: Data stored; ready to proceed to next blog post.
+
+... (Additional numbered steps for subsequent actions)
+```
+"""
+

 def get_update_memory_messages(retrieved_old_memory_dict, response_content, custom_update_memory_prompt=None):
    if custom_update_memory_prompt is None:
--- a/mem0/configs/vector_stores/azure_ai_search.py
+++ b/mem0/configs/vector_stores/azure_ai_search.py
@@ -1,4 +1,5 @@
 from typing import Any, Dict, Optional
+
 from pydantic import BaseModel, Field, model_validator


--- a/mem0/configs/vector_stores/supabase.py
+++ b/mem0/configs/vector_stores/supabase.py
@@ -1,5 +1,5 @@
-from typing import Any, Dict, Optional
 from enum import Enum
+from typing import Any, Dict, Optional

 from pydantic import BaseModel, Field, model_validator

--- a/mem0/configs/vector_stores/weaviate.py
+++ b/mem0/configs/vector_stores/weaviate.py
@@ -1,4 +1,5 @@
 from typing import Any, ClassVar, Dict, Optional
+
 from pydantic import BaseModel, Field, model_validator


--- a/mem0/memory/main.py
+++ b/mem0/memory/main.py
@@ -11,17 +11,15 @@ import pytz
 from pydantic import ValidationError

 from mem0.configs.base import MemoryConfig, MemoryItem
-from mem0.configs.prompts import get_update_memory_messages
+from mem0.configs.enums import MemoryType
+from mem0.configs.prompts import (PROCEDURAL_MEMORY_SYSTEM_PROMPT,
+                                  get_update_memory_messages)
 from mem0.memory.base import MemoryBase
 from mem0.memory.setup import setup_config
 from mem0.memory.storage import SQLiteManager
 from mem0.memory.telemetry import capture_event
-from mem0.memory.utils import (
-    get_fact_retrieval_messages,
-    parse_messages,
-    parse_vision_messages,
-    remove_code_blocks,
-)
+from mem0.memory.utils import (get_fact_retrieval_messages, parse_messages,
+                               parse_vision_messages, remove_code_blocks)
 from mem0.utils.factory import EmbedderFactory, LlmFactory, VectorStoreFactory

 # Setup user config
@@ -89,6 +87,7 @@ class Memory(MemoryBase):
        metadata=None,
        filters=None,
        infer=True,
+        memory_type=None,
        prompt=None,
    ):
        """
@@ -102,8 +101,8 @@ class Memory(MemoryBase):
            metadata (dict, optional): Metadata to store with the memory. Defaults to None.
            filters (dict, optional): Filters to apply to the search. Defaults to None.
            infer (bool, optional): Whether to infer the memories. Defaults to True.
-            prompt (str, optional): Prompt to use for memory deduction. Defaults to None.
-
+            memory_type (str, optional): Type of memory to create. Defaults to None. By default, it creates the short term memories and long term (semantic and episodic) memories. Pass "procedural_memory" to create procedural memories.
+            prompt (str, optional): Prompt to use for the memory creation. Defaults to None.
        Returns:
            dict: A dictionary containing the result of the memory addition operation.
            result: dict of affected events with each dict has the following key:
@@ -131,9 +130,18 @@ class Memory(MemoryBase):
        if not any(key in filters for key in ("user_id", "agent_id", "run_id")):
            raise ValueError("One of the filters: user_id, agent_id or run_id is required!")

+        if memory_type is not None and memory_type != MemoryType.PROCEDURAL.value:
+            raise ValueError(
+                f"Invalid 'memory_type'. Please pass {MemoryType.PROCEDURAL.value} to create procedural memories."
+            )
+
        if isinstance(messages, str):
            messages = [{"role": "user", "content": messages}]

+        if agent_id is not None and memory_type == MemoryType.PROCEDURAL.value:
+            results = self._create_procedural_memory(messages, metadata, prompt)
+            return results
+
        if self.config.llm.config.get("enable_vision"):
            messages = parse_vision_messages(messages, self.llm, self.config.llm.config.get("vision_details"))
        else:
@@ -595,11 +603,11 @@ class Memory(MemoryBase):
        return self.db.get_history(memory_id)

    def _create_memory(self, data, existing_embeddings, metadata=None):
-        logging.info(f"Creating memory with {data=}")
+        logging.debug(f"Creating memory with {data=}")
        if data in existing_embeddings:
            embeddings = existing_embeddings[data]
        else:
-            embeddings = self.embedding_model.embed(data, "add")
+            embeddings = self.embedding_model.embed(data, memory_action="add")
        memory_id = str(uuid.uuid4())
        metadata = metadata or {}
        metadata["data"] = data
@@ -615,6 +623,50 @@ class Memory(MemoryBase):
        capture_event("mem0._create_memory", self, {"memory_id": memory_id})
        return memory_id

+    def _create_procedural_memory(self, messages, metadata, llm=None, prompt=None):
+        """
+        Create a procedural memory
+        """
+        try:
+            from langchain_core.messages.utils import convert_to_messages  # type: ignore
+        except Exception:
+            logger.error("Import error while loading langchain-core. Please install 'langchain-core' to use procedural memory.")
+            raise
+
+        logger.info("Creating procedural memory")
+
+        parsed_messages = [
+            {"role": "system", "content": prompt or PROCEDURAL_MEMORY_SYSTEM_PROMPT},
+            *messages,
+            {"role": "user", "content": "Create procedural memory of the above conversation."},
+        ]
+
+        try:
+            if llm is not None:
+                parsed_messages = convert_to_messages(parsed_messages)
+                response = llm.invoke(messages=parsed_messages)
+                procedural_memory = response.content
+            else:
+                procedural_memory = self.llm.generate_response(messages=parsed_messages)
+        except Exception as e:
+            logger.error(f"Error generating procedural memory summary: {e}")
+            raise
+
+        if metadata is None:
+            raise ValueError("Metadata cannot be done for procedural memory.")
+
+        metadata["memory_type"] = MemoryType.PROCEDURAL.value
+        # Generate embeddings for the summary
+        embeddings = self.embedding_model.embed(procedural_memory, memory_action="add")
+        # Create the memory
+        memory_id = self._create_memory(procedural_memory, {procedural_memory: embeddings}, metadata=metadata)
+        capture_event("mem0._create_procedural_memory", self, {"memory_id": memory_id})
+
+        # Return results in the same format as add()
+        result = {"results": [{"id": memory_id, "memory": procedural_memory, "event": "ADD"}]}
+
+        return result
+
    def _update_memory(self, memory_id, data, existing_embeddings, metadata=None):
        logger.info(f"Updating memory with {data=}")

--- a/mem0/memory/storage.py
+++ b/mem0/memory/storage.py
@@ -1,6 +1,6 @@
 import sqlite3
-import uuid
 import threading
+import uuid


 class SQLiteManager:
--- a/mem0/vector_stores/supabase.py
+++ b/mem0/vector_stores/supabase.py
@@ -9,8 +9,8 @@ try:
 except ImportError:
    raise ImportError("The 'vecs' library is required. Please install it using 'pip install vecs'.")

+from mem0.configs.vector_stores.supabase import IndexMeasure, IndexMethod
 from mem0.vector_stores.base import VectorStoreBase
-from mem0.configs.vector_stores.supabase import IndexMethod, IndexMeasure

 logger = logging.getLogger(__name__)

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mem0ai"
-version = "0.1.79"
+version = "0.1.80"
 description = "Long-term memory for AI Agents"
 authors = ["Mem0 <founders@mem0.ai>"]
 exclude = [