Add config option for vertex embedding tasks (#2266)

This commit is contained in:
Wonbin Kim
2025-02-28 18:50:05 +09:00
committed by GitHub
parent 8143f86be6
commit 6acb00731d
14 changed files with 141 additions and 48 deletions

View File

@@ -1,5 +1,5 @@
import os
from typing import Optional
from typing import Literal, Optional
from openai import AzureOpenAI
@@ -26,13 +26,13 @@ class AzureOpenAIEmbedding(EmbeddingBase):
default_headers=default_headers,
)
def embed(self, text):
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
"""
Get the embedding for the given text using OpenAI.
Args:
text (str): The text to embed.
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
Returns:
list: The embedding vector.
"""

View File

@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Optional
from typing import Literal, Optional
from mem0.configs.embeddings.base import BaseEmbedderConfig
@@ -18,13 +18,13 @@ class EmbeddingBase(ABC):
self.config = config
@abstractmethod
def embed(self, text):
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]]):
"""
Get the embedding for the given text.
Args:
text (str): The text to embed.
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
Returns:
list: The embedding vector.
"""

View File

@@ -1,5 +1,5 @@
import os
from typing import Optional
from typing import Literal, Optional
import google.generativeai as genai
@@ -18,11 +18,12 @@ class GoogleGenAIEmbedding(EmbeddingBase):
genai.configure(api_key=api_key)
def embed(self, text):
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
"""
Get the embedding for the given text using Google Generative AI.
Args:
text (str): The text to embed.
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
Returns:
list: The embedding vector.
"""

View File

@@ -1,4 +1,4 @@
from typing import Optional
from typing import Literal, Optional
from sentence_transformers import SentenceTransformer
@@ -16,13 +16,13 @@ class HuggingFaceEmbedding(EmbeddingBase):
self.config.embedding_dims = self.config.embedding_dims or self.model.get_sentence_embedding_dimension()
def embed(self, text):
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
"""
Get the embedding for the given text using Hugging Face.
Args:
text (str): The text to embed.
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
Returns:
list: The embedding vector.
"""

View File

@@ -1,6 +1,6 @@
import subprocess
import sys
from typing import Optional
from typing import Literal, Optional
from mem0.configs.embeddings.base import BaseEmbedderConfig
from mem0.embeddings.base import EmbeddingBase
@@ -39,13 +39,13 @@ class OllamaEmbedding(EmbeddingBase):
if not any(model.get("name") == self.config.model for model in local_models):
self.client.pull(self.config.model)
def embed(self, text):
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
"""
Get the embedding for the given text using Ollama.
Args:
text (str): The text to embed.
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
Returns:
list: The embedding vector.
"""

View File

@@ -1,5 +1,5 @@
import os
from typing import Optional
from typing import Literal, Optional
from openai import OpenAI
@@ -18,13 +18,13 @@ class OpenAIEmbedding(EmbeddingBase):
base_url = self.config.openai_base_url or os.getenv("OPENAI_API_BASE")
self.client = OpenAI(api_key=api_key, base_url=base_url)
def embed(self, text):
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
"""
Get the embedding for the given text using OpenAI.
Args:
text (str): The text to embed.
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
Returns:
list: The embedding vector.
"""

View File

@@ -1,5 +1,5 @@
import os
from typing import Optional
from typing import Literal, Optional
from together import Together
@@ -17,13 +17,13 @@ class TogetherEmbedding(EmbeddingBase):
self.config.embedding_dims = self.config.embedding_dims or 768
self.client = Together(api_key=api_key)
def embed(self, text):
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
"""
Get the embedding for the given text using OpenAI.
Args:
text (str): The text to embed.
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
Returns:
list: The embedding vector.
"""

View File

@@ -1,7 +1,7 @@
import os
from typing import Optional
from typing import Literal, Optional
from vertexai.language_models import TextEmbeddingModel
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
from mem0.configs.embeddings.base import BaseEmbedderConfig
from mem0.embeddings.base import EmbeddingBase
@@ -13,7 +13,13 @@ class VertexAIEmbedding(EmbeddingBase):
self.config.model = self.config.model or "text-embedding-004"
self.config.embedding_dims = self.config.embedding_dims or 256
self.embedding_types = {
"add": self.config.memory_add_embedding_type or "RETRIEVAL_DOCUMENT",
"update": self.config.memory_update_embedding_type or "RETRIEVAL_DOCUMENT",
"search": self.config.memory_search_embedding_type or "RETRIEVAL_QUERY"
}
credentials_path = self.config.vertex_credentials_json
if credentials_path:
@@ -25,16 +31,24 @@ class VertexAIEmbedding(EmbeddingBase):
self.model = TextEmbeddingModel.from_pretrained(self.config.model)
def embed(self, text):
def embed(self, text, memory_action:Optional[Literal["add", "search", "update"]] = None):
"""
Get the embedding for the given text using Vertex AI.
Args:
text (str): The text to embed.
memory_action (optional): The type of embedding to use. Must be one of "add", "search", or "update". Defaults to None.
Returns:
list: The embedding vector.
"""
embeddings = self.model.get_embeddings(texts=[text], output_dimensionality=self.config.embedding_dims)
embedding_type = "SEMANTIC_SIMILARITY"
if memory_action is not None:
if memory_action not in self.embedding_types:
raise ValueError(f"Invalid memory action: {memory_action}")
embedding_type = self.embedding_types[memory_action]
text_input = TextEmbeddingInput(text=text, task_type=embedding_type)
embeddings = self.model.get_embeddings(texts=[text_input], output_dimensionality=self.config.embedding_dims)
return embeddings[0].values