From 959f4bb0592002ce791a48faffc7f9c4d60229dd Mon Sep 17 00:00:00 2001 From: Divyanshu Prasad <90151736+Divyanshu9822@users.noreply.github.com> Date: Fri, 13 Sep 2024 17:09:25 +0530 Subject: [PATCH] Add Support for Vertex AI Embeddings (#1840) --- docs/components/embedders/config.mdx | 1 + docs/components/embedders/models/vertexai.mdx | 35 +++++++++++++++++ docs/components/embedders/overview.mdx | 1 + mem0/embeddings/configs.py | 2 +- mem0/embeddings/vertexai.py | 39 +++++++++++++++++++ 5 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 docs/components/embedders/models/vertexai.mdx create mode 100644 mem0/embeddings/vertexai.py diff --git a/docs/components/embedders/config.mdx b/docs/components/embedders/config.mdx index 91731be8..95860b80 100644 --- a/docs/components/embedders/config.mdx +++ b/docs/components/embedders/config.mdx @@ -53,6 +53,7 @@ Here's a comprehensive list of all parameters that can be used across different | `model_kwargs` | Key-Value arguments for the Huggingface embedding model | | `azure_kwargs` | Key-Value arguments for the AzureOpenAI embedding model | | `openai_base_url` | Base URL for OpenAI API | OpenAI | +| `vertex_credentials_json` | Path to the Google Cloud credentials JSON file for VertexAI | ## Supported Embedding Models diff --git a/docs/components/embedders/models/vertexai.mdx b/docs/components/embedders/models/vertexai.mdx new file mode 100644 index 00000000..1fe8b95e --- /dev/null +++ b/docs/components/embedders/models/vertexai.mdx @@ -0,0 +1,35 @@ +### Vertex AI + +To use Google Cloud's Vertex AI for text embedding models, set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to point to the path of your service account's credentials JSON file. These credentials can be created in the [Google Cloud Console](https://console.cloud.google.com/). + +### Usage + +```python +import os +from mem0 import Memory + +# Set the path to your Google Cloud credentials JSON file +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/credentials.json" + +config = { + "embedder": { + "provider": "vertexai", + "config": { + "model": "text-embedding-004" + } + } +} + +m = Memory.from_config(config) +m.add("I'm visiting Paris", user_id="john") +``` + +### Config + +Here are the parameters available for configuring the Vertex AI embedder: + +| Parameter | Description | Default Value | +| ------------------------- | ------------------------------------------------ | -------------------- | +| `model` | The name of the Vertex AI embedding model to use | `text-embedding-004` | +| `vertex_credentials_json` | Path to the Google Cloud credentials JSON file | `None` | +| `embedding_dims` | Dimensions of the embedding model | `256` | diff --git a/docs/components/embedders/overview.mdx b/docs/components/embedders/overview.mdx index 2cd78c12..f1d7d7e5 100644 --- a/docs/components/embedders/overview.mdx +++ b/docs/components/embedders/overview.mdx @@ -13,6 +13,7 @@ See the list of supported embedders below. + ## Usage diff --git a/mem0/embeddings/configs.py b/mem0/embeddings/configs.py index 73aa9b30..9e3848cf 100644 --- a/mem0/embeddings/configs.py +++ b/mem0/embeddings/configs.py @@ -15,7 +15,7 @@ class EmbedderConfig(BaseModel): @field_validator("config") def validate_config(cls, v, values): provider = values.data.get("provider") - if provider in ["openai", "ollama", "huggingface", "azure_openai"]: + if provider in ["openai", "ollama", "huggingface", "azure_openai", "vertexai"]: return v else: raise ValueError(f"Unsupported embedding provider: {provider}") diff --git a/mem0/embeddings/vertexai.py b/mem0/embeddings/vertexai.py new file mode 100644 index 00000000..4839a2f3 --- /dev/null +++ b/mem0/embeddings/vertexai.py @@ -0,0 +1,39 @@ +import os +from typing import Optional + +from vertexai.language_models import TextEmbeddingModel + +from mem0.configs.embeddings.base import BaseEmbedderConfig +from mem0.embeddings.base import EmbeddingBase + +class VertexAI(EmbeddingBase): + def __init__(self, config: Optional[BaseEmbedderConfig] = None): + super().__init__(config) + + self.config.model = self.config.model or "text-embedding-004" + self.config.embedding_dims = self.config.embedding_dims or 256 + + credentials_path = self.config.vertex_credentials_json + + if credentials_path: + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path + elif not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): + raise ValueError( + "Google application credentials JSON is not provided. Please provide a valid JSON path or set the 'GOOGLE_APPLICATION_CREDENTIALS' environment variable." + ) + + self.model = TextEmbeddingModel.from_pretrained(self.config.model) + + def embed(self, text): + """ + Get the embedding for the given text using Vertex AI. + + Args: + text (str): The text to embed. + + Returns: + list: The embedding vector. + """ + embeddings = self.model.get_embeddings(texts=[text], output_dimensionality= self.config.embedding_dims) + + return embeddings[0].values