From c81e2efbb0d54f682e879ad0cd3c21c84ddcb7bd Mon Sep 17 00:00:00 2001
From: Dev Khant <devkhant24@gmail.com>
Date: Mon, 5 May 2025 11:20:34 +0530
Subject: [PATCH] Support for HF Inference (#2619)

---
 .../embedders/models/huggingface.mdx          | 37 ++++++++++++++++++-
 mem0/configs/embeddings/base.py               |  5 ++-
 mem0/embeddings/huggingface.py                | 15 ++++++--
 pyproject.toml                                |  2 +-
 4 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/docs/components/embedders/models/huggingface.mdx b/docs/components/embedders/models/huggingface.mdx
index b8b00835..1e9f5304 100644
--- a/docs/components/embedders/models/huggingface.mdx
+++ b/docs/components/embedders/models/huggingface.mdx
@@ -25,12 +25,44 @@ m = Memory.from_config(config)
 messages = [
     {"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"},
     {"role": "assistant", "content": "How about a thriller movies? They can be quite engaging."},
-    {"role": "user", "content": "I’m not a big fan of thriller movies but I love sci-fi movies."},
+    {"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."},
     {"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."}
 ]
 m.add(messages, user_id="john")
 ```
 
+### Using Text Embeddings Inference (TEI)
+
+You can also use Hugging Face's Text Embeddings Inference service for faster and more efficient embeddings:
+
+```python
+import os
+from mem0 import Memory
+
+os.environ["OPENAI_API_KEY"] = "your_api_key" # For LLM
+
+# Using HuggingFace Text Embeddings Inference API
+config = {
+    "embedder": {
+        "provider": "huggingface",
+        "config": {
+            "huggingface_base_url": "http://localhost:3000/v1"
+        }
+    }
+}
+
+m = Memory.from_config(config)
+m.add("This text will be embedded using the TEI service.", user_id="john")
+```
+
+To run the TEI service, you can use Docker:
+
+```bash
+docker run -d -p 3000:80 -v huggingfacetei:/data --platform linux/amd64 \
+    ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 \
+    --model-id BAAI/bge-small-en-v1.5
+```
+
 ### Config
 
 Here are the parameters available for configuring Huggingface embedder:
@@ -39,4 +71,5 @@ Here are the parameters available for configuring Huggingface embedder:
 | --- | --- | --- |
 | `model` | The name of the model to use | `multi-qa-MiniLM-L6-cos-v1` |
 | `embedding_dims` | Dimensions of the embedding model | `selected_model_dimensions` |
-| `model_kwargs` | Additional arguments for the model | `None` |
\ No newline at end of file
+| `model_kwargs` | Additional arguments for the model | `None` |
+| `huggingface_base_url` | URL to connect to Text Embeddings Inference (TEI) API | `None` |
\ No newline at end of file
diff --git a/mem0/configs/embeddings/base.py b/mem0/configs/embeddings/base.py
index b66b3339..23a9c6f2 100644
--- a/mem0/configs/embeddings/base.py
+++ b/mem0/configs/embeddings/base.py
@@ -22,6 +22,7 @@ class BaseEmbedderConfig(ABC):
         openai_base_url: Optional[str] = None,
         # Huggingface specific
         model_kwargs: Optional[dict] = None,
+        huggingface_base_url: Optional[str] = None,
         # AzureOpenAI specific
         azure_kwargs: Optional[AzureConfig] = {},
         http_client_proxies: Optional[Union[Dict, str]] = None,
@@ -46,6 +47,8 @@ class BaseEmbedderConfig(ABC):
         :type ollama_base_url: Optional[str], optional
         :param model_kwargs: key-value arguments for the huggingface embedding model, defaults a dict inside init
         :type model_kwargs: Optional[Dict[str, Any]], defaults a dict inside init
+        :param huggingface_base_url: Huggingface base URL to be use, defaults to None
+        :type huggingface_base_url: Optional[str], optional
         :param openai_base_url: Openai base URL to be use, defaults to "https://api.openai.com/v1"
         :type openai_base_url: Optional[str], optional
         :param azure_kwargs: key-value arguments for the AzureOpenAI embedding model, defaults a dict inside init
@@ -77,7 +80,7 @@ class BaseEmbedderConfig(ABC):
 
         # Huggingface specific
         self.model_kwargs = model_kwargs or {}
-
+        self.huggingface_base_url = huggingface_base_url
         # AzureOpenAI specific
         self.azure_kwargs = AzureConfig(**azure_kwargs) or {}
 
diff --git a/mem0/embeddings/huggingface.py b/mem0/embeddings/huggingface.py
index 6e77d6b3..334837ed 100644
--- a/mem0/embeddings/huggingface.py
+++ b/mem0/embeddings/huggingface.py
@@ -5,6 +5,7 @@ logging.getLogger("transformers").setLevel(logging.WARNING)
 logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
 logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
 
+from openai import OpenAI
 from sentence_transformers import SentenceTransformer
 
 from mem0.configs.embeddings.base import BaseEmbedderConfig
@@ -15,11 +16,14 @@ class HuggingFaceEmbedding(EmbeddingBase):
     def __init__(self, config: Optional[BaseEmbedderConfig] = None):
         super().__init__(config)
 
-        self.config.model = self.config.model or "multi-qa-MiniLM-L6-cos-v1"
+        if config.huggingface_base_url:
+            self.client = OpenAI(base_url=config.huggingface_base_url)
+        else:
+            self.config.model = self.config.model or "multi-qa-MiniLM-L6-cos-v1"
 
-        self.model = SentenceTransformer(self.config.model, **self.config.model_kwargs)
+            self.model = SentenceTransformer(self.config.model, **self.config.model_kwargs)
 
-        self.config.embedding_dims = self.config.embedding_dims or self.model.get_sentence_embedding_dimension()
+            self.config.embedding_dims = self.config.embedding_dims or self.model.get_sentence_embedding_dimension()
 
     def embed(self, text, memory_action: Optional[Literal["add", "search", "update"]] = None):
         """
@@ -31,4 +35,7 @@ class HuggingFaceEmbedding(EmbeddingBase):
         Returns:
             list: The embedding vector.
         """
-        return self.model.encode(text, convert_to_numpy=True).tolist()
+        if self.config.huggingface_base_url:
+            return self.client.embeddings.create(input=text, model="tei").data[0].embedding
+        else:
+            return self.model.encode(text, convert_to_numpy=True).tolist()
diff --git a/pyproject.toml b/pyproject.toml
index 2ce0b7af..884e2c15 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mem0ai"
-version = "0.1.96"
+version = "0.1.97"
 description = "Long-term memory for AI Agents"
 authors = ["Mem0 <founders@mem0.ai>"]
 exclude = [