diff --git a/embedchain/vectordb/chroma.py b/embedchain/vectordb/chroma.py index fa615a41..1dfadd9b 100644 --- a/embedchain/vectordb/chroma.py +++ b/embedchain/vectordb/chroma.py @@ -126,12 +126,16 @@ class ChromaDB(BaseVectorDB): """ Add vectors to chroma database + :param embeddings: list of embeddings to add + :type embeddings: List[List[str]] :param documents: Documents :type documents: List[str] :param metadatas: Metadatas :type metadatas: List[object] :param ids: ids :type ids: List[str] + :param skip_embedding: Optional. If True, then the embeddings are assumed to be already generated. + :type skip_embedding: bool """ if skip_embedding: self.collection.add(embeddings=embeddings, documents=documents, metadatas=metadatas, ids=ids) @@ -158,7 +162,7 @@ class ChromaDB(BaseVectorDB): def query(self, input_query: List[str], n_results: int, where: Dict[str, any], skip_embedding: bool) -> List[str]: """ - Query contents from vector data base based on vector similarity + Query contents from vector database based on vector similarity :param input_query: list of query string :type input_query: List[str] @@ -166,6 +170,8 @@ class ChromaDB(BaseVectorDB): :type n_results: int :param where: to filter data :type where: Dict[str, Any] + :param skip_embedding: Optional. If True, then the input_query is assumed to be already embedded. + :type skip_embedding: bool :raises InvalidDimensionException: Dimensions do not match. :return: The content of the document that matched your query. :rtype: List[str] diff --git a/embedchain/vectordb/elasticsearch.py b/embedchain/vectordb/elasticsearch.py index 48a6d44d..02b45ae5 100644 --- a/embedchain/vectordb/elasticsearch.py +++ b/embedchain/vectordb/elasticsearch.py @@ -110,12 +110,16 @@ class ElasticsearchDB(BaseVectorDB): ) -> Any: """ add data in vector database + :param embeddings: list of embeddings to add + :type embeddings: List[List[str]] :param documents: list of texts to add :type documents: List[str] :param metadatas: list of metadata associated with docs :type metadatas: List[object] :param ids: ids of docs :type ids: List[str] + :param skip_embedding: Optional. If True, then the input_query is assumed to be already embedded. + :type skip_embedding: bool """ docs = [] @@ -143,6 +147,8 @@ class ElasticsearchDB(BaseVectorDB): :type n_results: int :param where: Optional. to filter data :type where: Dict[str, any] + :param skip_embedding: Optional. If True, then the input_query is assumed to be already embedded. + :type skip_embedding: bool :return: Database contents that are the result of the query :rtype: List[str] """ diff --git a/embedchain/vectordb/opensearch.py b/embedchain/vectordb/opensearch.py index b3f1983d..edfa69f8 100644 --- a/embedchain/vectordb/opensearch.py +++ b/embedchain/vectordb/opensearch.py @@ -110,12 +110,12 @@ class OpenSearchDB(BaseVectorDB): return result def add( - self, embeddings: List[str], documents: List[str], metadatas: List[object], ids: List[str], skip_embedding: bool - ): + self, embeddings: List[List[str]], documents: List[str], metadatas: List[object], ids: List[str], + skip_embedding: bool): """add data in vector database :param embeddings: list of embeddings to add - :type embeddings: List[str] + :type embeddings: List[List[str]] :param documents: list of texts to add :type documents: List[str] :param metadatas: list of metadata associated with docs @@ -127,8 +127,8 @@ class OpenSearchDB(BaseVectorDB): """ docs = [] - # TODO(rupeshbansal, deshraj): Add support for skip embeddings here if already exists - embeddings = self.embedder.embedding_fn(documents) + if not skip_embedding: + embeddings = self.embedder.embedding_fn(documents) for id, text, metadata, embeddings in zip(ids, documents, metadatas, embeddings): docs.append( {