From 959f4bb0592002ce791a48faffc7f9c4d60229dd Mon Sep 17 00:00:00 2001
From: Divyanshu Prasad <90151736+Divyanshu9822@users.noreply.github.com>
Date: Fri, 13 Sep 2024 17:09:25 +0530
Subject: [PATCH] Add Support for Vertex AI Embeddings (#1840)
---
docs/components/embedders/config.mdx | 1 +
docs/components/embedders/models/vertexai.mdx | 35 +++++++++++++++++
docs/components/embedders/overview.mdx | 1 +
mem0/embeddings/configs.py | 2 +-
mem0/embeddings/vertexai.py | 39 +++++++++++++++++++
5 files changed, 77 insertions(+), 1 deletion(-)
create mode 100644 docs/components/embedders/models/vertexai.mdx
create mode 100644 mem0/embeddings/vertexai.py
diff --git a/docs/components/embedders/config.mdx b/docs/components/embedders/config.mdx
index 91731be8..95860b80 100644
--- a/docs/components/embedders/config.mdx
+++ b/docs/components/embedders/config.mdx
@@ -53,6 +53,7 @@ Here's a comprehensive list of all parameters that can be used across different
| `model_kwargs` | Key-Value arguments for the Huggingface embedding model |
| `azure_kwargs` | Key-Value arguments for the AzureOpenAI embedding model |
| `openai_base_url` | Base URL for OpenAI API | OpenAI |
+| `vertex_credentials_json` | Path to the Google Cloud credentials JSON file for VertexAI |
## Supported Embedding Models
diff --git a/docs/components/embedders/models/vertexai.mdx b/docs/components/embedders/models/vertexai.mdx
new file mode 100644
index 00000000..1fe8b95e
--- /dev/null
+++ b/docs/components/embedders/models/vertexai.mdx
@@ -0,0 +1,35 @@
+### Vertex AI
+
+To use Google Cloud's Vertex AI for text embedding models, set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to point to the path of your service account's credentials JSON file. These credentials can be created in the [Google Cloud Console](https://console.cloud.google.com/).
+
+### Usage
+
+```python
+import os
+from mem0 import Memory
+
+# Set the path to your Google Cloud credentials JSON file
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/credentials.json"
+
+config = {
+ "embedder": {
+ "provider": "vertexai",
+ "config": {
+ "model": "text-embedding-004"
+ }
+ }
+}
+
+m = Memory.from_config(config)
+m.add("I'm visiting Paris", user_id="john")
+```
+
+### Config
+
+Here are the parameters available for configuring the Vertex AI embedder:
+
+| Parameter | Description | Default Value |
+| ------------------------- | ------------------------------------------------ | -------------------- |
+| `model` | The name of the Vertex AI embedding model to use | `text-embedding-004` |
+| `vertex_credentials_json` | Path to the Google Cloud credentials JSON file | `None` |
+| `embedding_dims` | Dimensions of the embedding model | `256` |
diff --git a/docs/components/embedders/overview.mdx b/docs/components/embedders/overview.mdx
index 2cd78c12..f1d7d7e5 100644
--- a/docs/components/embedders/overview.mdx
+++ b/docs/components/embedders/overview.mdx
@@ -13,6 +13,7 @@ See the list of supported embedders below.
+
## Usage
diff --git a/mem0/embeddings/configs.py b/mem0/embeddings/configs.py
index 73aa9b30..9e3848cf 100644
--- a/mem0/embeddings/configs.py
+++ b/mem0/embeddings/configs.py
@@ -15,7 +15,7 @@ class EmbedderConfig(BaseModel):
@field_validator("config")
def validate_config(cls, v, values):
provider = values.data.get("provider")
- if provider in ["openai", "ollama", "huggingface", "azure_openai"]:
+ if provider in ["openai", "ollama", "huggingface", "azure_openai", "vertexai"]:
return v
else:
raise ValueError(f"Unsupported embedding provider: {provider}")
diff --git a/mem0/embeddings/vertexai.py b/mem0/embeddings/vertexai.py
new file mode 100644
index 00000000..4839a2f3
--- /dev/null
+++ b/mem0/embeddings/vertexai.py
@@ -0,0 +1,39 @@
+import os
+from typing import Optional
+
+from vertexai.language_models import TextEmbeddingModel
+
+from mem0.configs.embeddings.base import BaseEmbedderConfig
+from mem0.embeddings.base import EmbeddingBase
+
+class VertexAI(EmbeddingBase):
+ def __init__(self, config: Optional[BaseEmbedderConfig] = None):
+ super().__init__(config)
+
+ self.config.model = self.config.model or "text-embedding-004"
+ self.config.embedding_dims = self.config.embedding_dims or 256
+
+ credentials_path = self.config.vertex_credentials_json
+
+ if credentials_path:
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
+ elif not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
+ raise ValueError(
+ "Google application credentials JSON is not provided. Please provide a valid JSON path or set the 'GOOGLE_APPLICATION_CREDENTIALS' environment variable."
+ )
+
+ self.model = TextEmbeddingModel.from_pretrained(self.config.model)
+
+ def embed(self, text):
+ """
+ Get the embedding for the given text using Vertex AI.
+
+ Args:
+ text (str): The text to embed.
+
+ Returns:
+ list: The embedding vector.
+ """
+ embeddings = self.model.get_embeddings(texts=[text], output_dimensionality= self.config.embedding_dims)
+
+ return embeddings[0].values