diff --git a/docs/components/embedding-models.mdx b/docs/components/embedding-models.mdx index 49efa4ac..4e362fb3 100644 --- a/docs/components/embedding-models.mdx +++ b/docs/components/embedding-models.mdx @@ -14,6 +14,7 @@ Embedchain supports several embedding models from the following providers: + ## OpenAI @@ -273,3 +274,87 @@ answer = app.query("What is the net worth of Elon Musk today?") # Additionally, his net worth may include other assets such as real estate and art, which are not reflected in his stock portfolio. ``` + + +## Cohere + +To use embedding models and LLMs from COHERE, create an account on [COHERE](https://dashboard.cohere.com/welcome/login?redirect_uri=%2Fapi-keys). + +Generate an API key from their dashboard. Set the API key as `COHERE_API_KEY` environment variable. + +Once you have obtained the key, you can use it like this: + + + +```python main.py +import os +from embedchain import App + +os.environ['COHERE_API_KEY'] = 'xxx' + +# load embedding model configuration from config.yaml file +app = App.from_config(config_path="config.yaml") +``` + +```yaml config.yaml +embedder: + provider: cohere + config: + model: 'embed-english-light-v3.0' +``` + + + +* Cohere has few embedding models: `embed-english-v3.0`, `embed-multilingual-v3.0`, `embed-multilingual-light-v3.0`, `embed-english-v2.0`, `embed-english-light-v2.0` and `embed-multilingual-v2.0`. Embedchain supports all these models. Below you can find YAML config for all: + + + +```yaml embed-english-v3.0.yaml +embedder: + provider: cohere + config: + model: 'embed-english-v3.0' + vector_dimension: 1024 +``` + +```yaml embed-multilingual-v3.0.yaml +embedder: + provider: cohere + config: + model: 'embed-multilingual-v3.0' + vector_dimension: 1024 +``` + +```yaml embed-multilingual-light-v3.0.yaml +embedder: + provider: cohere + config: + model: 'embed-multilingual-light-v3.0' + vector_dimension: 384 +``` + +```yaml embed-english-v2.0.yaml +embedder: + provider: cohere + config: + model: 'embed-english-v2.0' + vector_dimension: 4096 +``` + +```yaml embed-english-light-v2.0.yaml +embedder: + provider: cohere + config: + model: 'embed-english-light-v2.0' + vector_dimension: 1024 +``` + +```yaml embed-multilingual-v2.0.yaml +embedder: + provider: cohere + config: + model: 'embed-multilingual-v2.0' + vector_dimension: 768 +``` + + \ No newline at end of file diff --git a/embedchain/embedder/cohere.py b/embedchain/embedder/cohere.py new file mode 100644 index 00000000..1431bfee --- /dev/null +++ b/embedchain/embedder/cohere.py @@ -0,0 +1,19 @@ +from typing import Optional + +from langchain_community.embeddings import CohereEmbeddings + +from embedchain.config import BaseEmbedderConfig +from embedchain.embedder.base import BaseEmbedder +from embedchain.models import VectorDimensions + + +class CohereEmbedder(BaseEmbedder): + def __init__(self, config: Optional[BaseEmbedderConfig] = None): + super().__init__(config=config) + + embeddings = CohereEmbeddings(model=self.config.model) + embedding_fn = BaseEmbedder._langchain_default_concept(embeddings) + self.set_embedding_fn(embedding_fn=embedding_fn) + + vector_dimension = self.config.vector_dimension or VectorDimensions.COHERE.value + self.set_vector_dimension(vector_dimension=vector_dimension) diff --git a/embedchain/factory.py b/embedchain/factory.py index f2862112..9c0dea3e 100644 --- a/embedchain/factory.py +++ b/embedchain/factory.py @@ -56,6 +56,7 @@ class EmbedderFactory: "google": "embedchain.embedder.google.GoogleAIEmbedder", "mistralai": "embedchain.embedder.mistralai.MistralAIEmbedder", "nvidia": "embedchain.embedder.nvidia.NvidiaEmbedder", + "cohere": "embedchain.embedder.cohere.CohereEmbedder", } provider_to_config_class = { "azure_openai": "embedchain.config.embedder.base.BaseEmbedderConfig", diff --git a/embedchain/models/vector_dimensions.py b/embedchain/models/vector_dimensions.py index 23bedc3d..1e0c740a 100644 --- a/embedchain/models/vector_dimensions.py +++ b/embedchain/models/vector_dimensions.py @@ -10,3 +10,4 @@ class VectorDimensions(Enum): GOOGLE_AI = 768 MISTRAL_AI = 1024 NVIDIA_AI = 1024 + COHERE = 384