From c81e2efbb0d54f682e879ad0cd3c21c84ddcb7bd Mon Sep 17 00:00:00 2001 From: Dev Khant Date: Mon, 5 May 2025 11:20:34 +0530 Subject: [PATCH] Support for HF Inference (#2619) --- .../embedders/models/huggingface.mdx | 37 ++++++++++++++++++- mem0/configs/embeddings/base.py | 5 ++- mem0/embeddings/huggingface.py | 15 ++++++-- pyproject.toml | 2 +- 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/docs/components/embedders/models/huggingface.mdx b/docs/components/embedders/models/huggingface.mdx index b8b00835..1e9f5304 100644 --- a/docs/components/embedders/models/huggingface.mdx +++ b/docs/components/embedders/models/huggingface.mdx @@ -25,12 +25,44 @@ m = Memory.from_config(config) messages = [ {"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"}, {"role": "assistant", "content": "How about a thriller movies? They can be quite engaging."}, - {"role": "user", "content": "I’m not a big fan of thriller movies but I love sci-fi movies."}, + {"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."}, {"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."} ] m.add(messages, user_id="john") ``` +### Using Text Embeddings Inference (TEI) + +You can also use Hugging Face's Text Embeddings Inference service for faster and more efficient embeddings: + +```python +import os +from mem0 import Memory + +os.environ["OPENAI_API_KEY"] = "your_api_key" # For LLM + +# Using HuggingFace Text Embeddings Inference API +config = { + "embedder": { + "provider": "huggingface", + "config": { + "huggingface_base_url": "http://localhost:3000/v1" + } + } +} + +m = Memory.from_config(config) +m.add("This text will be embedded using the TEI service.", user_id="john") +``` + +To run the TEI service, you can use Docker: + +```bash +docker run -d -p 3000:80 -v huggingfacetei:/data --platform linux/amd64 \ + ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 \ + --model-id BAAI/bge-small-en-v1.5 +``` + ### Config Here are the parameters available for configuring Huggingface embedder: @@ -39,4 +71,5 @@ Here are the parameters available for configuring Huggingface embedder: | --- | --- | --- | | `model` | The name of the model to use | `multi-qa-MiniLM-L6-cos-v1` | | `embedding_dims` | Dimensions of the embedding model | `selected_model_dimensions` | -| `model_kwargs` | Additional arguments for the model | `None` | \ No newline at end of file +| `model_kwargs` | Additional arguments for the model | `None` | +| `huggingface_base_url` | URL to connect to Text Embeddings Inference (TEI) API | `None` | \ No newline at end of file diff --git a/mem0/configs/embeddings/base.py b/mem0/configs/embeddings/base.py index b66b3339..23a9c6f2 100644 --- a/mem0/configs/embeddings/base.py +++ b/mem0/configs/embeddings/base.py @@ -22,6 +22,7 @@ class BaseEmbedderConfig(ABC): openai_base_url: Optional[str] = None, # Huggingface specific model_kwargs: Optional[dict] = None, + huggingface_base_url: Optional[str] = None, # AzureOpenAI specific azure_kwargs: Optional[AzureConfig] = {}, http_client_proxies: Optional[Union[Dict, str]] = None, @@ -46,6 +47,8 @@ class BaseEmbedderConfig(ABC): :type ollama_base_url: Optional[str], optional :param model_kwargs: key-value arguments for the huggingface embedding model, defaults a dict inside init :type model_kwargs: Optional[Dict[str, Any]], defaults a dict inside init + :param huggingface_base_url: Huggingface base URL to be use, defaults to None + :type huggingface_base_url: Optional[str], optional :param openai_base_url: Openai base URL to be use, defaults to "https://api.openai.com/v1" :type openai_base_url: Optional[str], optional :param azure_kwargs: key-value arguments for the AzureOpenAI embedding model, defaults a dict inside init @@ -77,7 +80,7 @@ class BaseEmbedderConfig(ABC): # Huggingface specific self.model_kwargs = model_kwargs or {} - + self.huggingface_base_url = huggingface_base_url # AzureOpenAI specific self.azure_kwargs = AzureConfig(**azure_kwargs) or {} diff --git a/mem0/embeddings/huggingface.py b/mem0/embeddings/huggingface.py index 6e77d6b3..334837ed 100644 --- a/mem0/embeddings/huggingface.py +++ b/mem0/embeddings/huggingface.py @@ -5,6 +5,7 @@ logging.getLogger("transformers").setLevel(logging.WARNING) logging.getLogger("sentence_transformers").setLevel(logging.WARNING) logging.getLogger("huggingface_hub").setLevel(logging.WARNING) +from openai import OpenAI from sentence_transformers import SentenceTransformer from mem0.configs.embeddings.base import BaseEmbedderConfig @@ -15,11 +16,14 @@ class HuggingFaceEmbedding(EmbeddingBase): def __init__(self, config: Optional[BaseEmbedderConfig] = None): super().__init__(config) - self.config.model = self.config.model or "multi-qa-MiniLM-L6-cos-v1" + if config.huggingface_base_url: + self.client = OpenAI(base_url=config.huggingface_base_url) + else: + self.config.model = self.config.model or "multi-qa-MiniLM-L6-cos-v1" - self.model = SentenceTransformer(self.config.model, **self.config.model_kwargs) + self.model = SentenceTransformer(self.config.model, **self.config.model_kwargs) - self.config.embedding_dims = self.config.embedding_dims or self.model.get_sentence_embedding_dimension() + self.config.embedding_dims = self.config.embedding_dims or self.model.get_sentence_embedding_dimension() def embed(self, text, memory_action: Optional[Literal["add", "search", "update"]] = None): """ @@ -31,4 +35,7 @@ class HuggingFaceEmbedding(EmbeddingBase): Returns: list: The embedding vector. """ - return self.model.encode(text, convert_to_numpy=True).tolist() + if self.config.huggingface_base_url: + return self.client.embeddings.create(input=text, model="tei").data[0].embedding + else: + return self.model.encode(text, convert_to_numpy=True).tolist() diff --git a/pyproject.toml b/pyproject.toml index 2ce0b7af..884e2c15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mem0ai" -version = "0.1.96" +version = "0.1.97" description = "Long-term memory for AI Agents" authors = ["Mem0 "] exclude = [