Add config option for vertex embedding tasks (#2266)
This commit is contained in:
@@ -27,6 +27,9 @@ class BaseEmbedderConfig(ABC):
|
||||
http_client_proxies: Optional[Union[Dict, str]] = None,
|
||||
# VertexAI specific
|
||||
vertex_credentials_json: Optional[str] = None,
|
||||
memory_add_embedding_type: Optional[str] = None,
|
||||
memory_update_embedding_type: Optional[str] = None,
|
||||
memory_search_embedding_type: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Initializes a configuration class instance for the Embeddings.
|
||||
@@ -47,6 +50,14 @@ class BaseEmbedderConfig(ABC):
|
||||
:type azure_kwargs: Optional[Dict[str, Any]], defaults a dict inside init
|
||||
:param http_client_proxies: The proxy server settings used to create self.http_client, defaults to None
|
||||
:type http_client_proxies: Optional[Dict | str], optional
|
||||
:param vertex_credentials_json: The path to the Vertex AI credentials JSON file, defaults to None
|
||||
:type vertex_credentials_json: Optional[str], optional
|
||||
:param memory_add_embedding_type: The type of embedding to use for the add memory action, defaults to None
|
||||
:type memory_add_embedding_type: Optional[str], optional
|
||||
:param memory_update_embedding_type: The type of embedding to use for the update memory action, defaults to None
|
||||
:type memory_update_embedding_type: Optional[str], optional
|
||||
:param memory_search_embedding_type: The type of embedding to use for the search memory action, defaults to None
|
||||
:type memory_search_embedding_type: Optional[str], optional
|
||||
"""
|
||||
|
||||
self.model = model
|
||||
@@ -68,3 +79,6 @@ class BaseEmbedderConfig(ABC):
|
||||
|
||||
# VertexAI specific
|
||||
self.vertex_credentials_json = vertex_credentials_json
|
||||
self.memory_add_embedding_type = memory_add_embedding_type
|
||||
self.memory_update_embedding_type = memory_update_embedding_type
|
||||
self.memory_search_embedding_type = memory_search_embedding_type
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
from typing import Literal, Optional
|
||||
|
||||
from openai import AzureOpenAI
|
||||
|
||||
@@ -26,13 +26,13 @@ class AzureOpenAIEmbedding(EmbeddingBase):
|
||||
default_headers=default_headers,
|
||||
)
|
||||
|
||||
def embed(self, text):
|
||||
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
|
||||
"""
|
||||
Get the embedding for the given text using OpenAI.
|
||||
|
||||
Args:
|
||||
text (str): The text to embed.
|
||||
|
||||
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
|
||||
Returns:
|
||||
list: The embedding vector.
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
from typing import Literal, Optional
|
||||
|
||||
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||
|
||||
@@ -18,13 +18,13 @@ class EmbeddingBase(ABC):
|
||||
self.config = config
|
||||
|
||||
@abstractmethod
|
||||
def embed(self, text):
|
||||
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]]):
|
||||
"""
|
||||
Get the embedding for the given text.
|
||||
|
||||
Args:
|
||||
text (str): The text to embed.
|
||||
|
||||
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
|
||||
Returns:
|
||||
list: The embedding vector.
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
from typing import Literal, Optional
|
||||
|
||||
import google.generativeai as genai
|
||||
|
||||
@@ -18,11 +18,12 @@ class GoogleGenAIEmbedding(EmbeddingBase):
|
||||
|
||||
genai.configure(api_key=api_key)
|
||||
|
||||
def embed(self, text):
|
||||
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
|
||||
"""
|
||||
Get the embedding for the given text using Google Generative AI.
|
||||
Args:
|
||||
text (str): The text to embed.
|
||||
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
|
||||
Returns:
|
||||
list: The embedding vector.
|
||||
"""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Optional
|
||||
from typing import Literal, Optional
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
@@ -16,13 +16,13 @@ class HuggingFaceEmbedding(EmbeddingBase):
|
||||
|
||||
self.config.embedding_dims = self.config.embedding_dims or self.model.get_sentence_embedding_dimension()
|
||||
|
||||
def embed(self, text):
|
||||
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
|
||||
"""
|
||||
Get the embedding for the given text using Hugging Face.
|
||||
|
||||
Args:
|
||||
text (str): The text to embed.
|
||||
|
||||
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
|
||||
Returns:
|
||||
list: The embedding vector.
|
||||
"""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Optional
|
||||
from typing import Literal, Optional
|
||||
|
||||
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||
from mem0.embeddings.base import EmbeddingBase
|
||||
@@ -39,13 +39,13 @@ class OllamaEmbedding(EmbeddingBase):
|
||||
if not any(model.get("name") == self.config.model for model in local_models):
|
||||
self.client.pull(self.config.model)
|
||||
|
||||
def embed(self, text):
|
||||
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
|
||||
"""
|
||||
Get the embedding for the given text using Ollama.
|
||||
|
||||
Args:
|
||||
text (str): The text to embed.
|
||||
|
||||
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
|
||||
Returns:
|
||||
list: The embedding vector.
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
from typing import Literal, Optional
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
@@ -18,13 +18,13 @@ class OpenAIEmbedding(EmbeddingBase):
|
||||
base_url = self.config.openai_base_url or os.getenv("OPENAI_API_BASE")
|
||||
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
|
||||
def embed(self, text):
|
||||
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
|
||||
"""
|
||||
Get the embedding for the given text using OpenAI.
|
||||
|
||||
Args:
|
||||
text (str): The text to embed.
|
||||
|
||||
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
|
||||
Returns:
|
||||
list: The embedding vector.
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
from typing import Literal, Optional
|
||||
|
||||
from together import Together
|
||||
|
||||
@@ -17,13 +17,13 @@ class TogetherEmbedding(EmbeddingBase):
|
||||
self.config.embedding_dims = self.config.embedding_dims or 768
|
||||
self.client = Together(api_key=api_key)
|
||||
|
||||
def embed(self, text):
|
||||
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
|
||||
"""
|
||||
Get the embedding for the given text using OpenAI.
|
||||
|
||||
Args:
|
||||
text (str): The text to embed.
|
||||
|
||||
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
|
||||
Returns:
|
||||
list: The embedding vector.
|
||||
"""
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
from typing import Literal, Optional
|
||||
|
||||
from vertexai.language_models import TextEmbeddingModel
|
||||
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
|
||||
|
||||
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||
from mem0.embeddings.base import EmbeddingBase
|
||||
@@ -13,7 +13,13 @@ class VertexAIEmbedding(EmbeddingBase):
|
||||
|
||||
self.config.model = self.config.model or "text-embedding-004"
|
||||
self.config.embedding_dims = self.config.embedding_dims or 256
|
||||
|
||||
|
||||
self.embedding_types = {
|
||||
"add": self.config.memory_add_embedding_type or "RETRIEVAL_DOCUMENT",
|
||||
"update": self.config.memory_update_embedding_type or "RETRIEVAL_DOCUMENT",
|
||||
"search": self.config.memory_search_embedding_type or "RETRIEVAL_QUERY"
|
||||
}
|
||||
|
||||
credentials_path = self.config.vertex_credentials_json
|
||||
|
||||
if credentials_path:
|
||||
@@ -25,16 +31,24 @@ class VertexAIEmbedding(EmbeddingBase):
|
||||
|
||||
self.model = TextEmbeddingModel.from_pretrained(self.config.model)
|
||||
|
||||
def embed(self, text):
|
||||
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
|
||||
"""
|
||||
Get the embedding for the given text using Vertex AI.
|
||||
|
||||
Args:
|
||||
text (str): The text to embed.
|
||||
|
||||
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
|
||||
Returns:
|
||||
list: The embedding vector.
|
||||
"""
|
||||
embeddings = self.model.get_embeddings(texts=[text], output_dimensionality=self.config.embedding_dims)
|
||||
embedding_type = "SEMANTIC_SIMILARITY"
|
||||
if memory_action is not None:
|
||||
if memory_action not in self.embedding_types:
|
||||
raise ValueError(f"Invalid memory action: {memory_action}")
|
||||
|
||||
embedding_type = self.embedding_types[memory_action]
|
||||
|
||||
text_input = TextEmbeddingInput(text=text, task_type=embedding_type)
|
||||
embeddings = self.model.get_embeddings(texts=[text_input], output_dimensionality=self.config.embedding_dims)
|
||||
|
||||
return embeddings[0].values
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import Any, Dict
|
||||
|
||||
import pytz
|
||||
from pydantic import ValidationError
|
||||
from mem0.memory.utils import parse_vision_messages
|
||||
|
||||
from mem0.configs.base import MemoryConfig, MemoryItem
|
||||
from mem0.configs.prompts import get_update_memory_messages
|
||||
from mem0.memory.base import MemoryBase
|
||||
@@ -19,6 +19,7 @@ from mem0.memory.telemetry import capture_event
|
||||
from mem0.memory.utils import (
|
||||
get_fact_retrieval_messages,
|
||||
parse_messages,
|
||||
parse_vision_messages,
|
||||
remove_code_blocks,
|
||||
)
|
||||
from mem0.utils.factory import EmbedderFactory, LlmFactory, VectorStoreFactory
|
||||
@@ -167,7 +168,7 @@ class Memory(MemoryBase):
|
||||
retrieved_old_memory = []
|
||||
new_message_embeddings = {}
|
||||
for new_mem in new_retrieved_facts:
|
||||
messages_embeddings = self.embedding_model.embed(new_mem)
|
||||
messages_embeddings = self.embedding_model.embed(new_mem, "add")
|
||||
new_message_embeddings[new_mem] = messages_embeddings
|
||||
existing_memories = self.vector_store.search(
|
||||
query=messages_embeddings,
|
||||
@@ -446,7 +447,7 @@ class Memory(MemoryBase):
|
||||
return original_memories
|
||||
|
||||
def _search_vector_store(self, query, filters, limit):
|
||||
embeddings = self.embedding_model.embed(query)
|
||||
embeddings = self.embedding_model.embed(query, "search")
|
||||
memories = self.vector_store.search(query=embeddings, limit=limit, filters=filters)
|
||||
|
||||
excluded_keys = {
|
||||
@@ -494,7 +495,7 @@ class Memory(MemoryBase):
|
||||
"""
|
||||
capture_event("mem0.update", self, {"memory_id": memory_id})
|
||||
|
||||
existing_embeddings = {data: self.embedding_model.embed(data)}
|
||||
existing_embeddings = {data: self.embedding_model.embed(data, "update")}
|
||||
|
||||
self._update_memory(memory_id, data, existing_embeddings)
|
||||
return {"message": "Memory updated successfully!"}
|
||||
@@ -562,7 +563,7 @@ class Memory(MemoryBase):
|
||||
if data in existing_embeddings:
|
||||
embeddings = existing_embeddings[data]
|
||||
else:
|
||||
embeddings = self.embedding_model.embed(data)
|
||||
embeddings = self.embedding_model.embed(data, "add")
|
||||
memory_id = str(uuid.uuid4())
|
||||
metadata = metadata or {}
|
||||
metadata["data"] = data
|
||||
@@ -603,7 +604,7 @@ class Memory(MemoryBase):
|
||||
if data in existing_embeddings:
|
||||
embeddings = existing_embeddings[data]
|
||||
else:
|
||||
embeddings = self.embedding_model.embed(data)
|
||||
embeddings = self.embedding_model.embed(data, "update")
|
||||
self.vector_store.update(
|
||||
vector_id=memory_id,
|
||||
vector=embeddings,
|
||||
|
||||
Reference in New Issue
Block a user