[OpenSearch]: Fix add() and query() for opensearch db (#764)

This commit is contained in:
Deshraj Yadav
2023-10-04 12:53:07 -07:00
committed by GitHub
parent 87d0b5c76f
commit 352e71461d
2 changed files with 13 additions and 3 deletions

View File

@@ -99,18 +99,25 @@ class OpenSearchDB(BaseVectorDB):
ids = [doc["_id"] for doc in docs] ids = [doc["_id"] for doc in docs]
return {"ids": set(ids)} return {"ids": set(ids)}
def add(self, documents: List[str], metadatas: List[object], ids: List[str]): def add(
self, embeddings: List[str], documents: List[str], metadatas: List[object], ids: List[str], skip_embedding: bool
):
"""add data in vector database """add data in vector database
:param embeddings: list of embeddings to add
:type embeddings: List[str]
:param documents: list of texts to add :param documents: list of texts to add
:type documents: List[str] :type documents: List[str]
:param metadatas: list of metadata associated with docs :param metadatas: list of metadata associated with docs
:type metadatas: List[object] :type metadatas: List[object]
:param ids: ids of docs :param ids: ids of docs
:type ids: List[str] :type ids: List[str]
:param skip_embedding: Optional. If True, then the embeddings are assumed to be already generated.
:type skip_embedding: bool
""" """
docs = [] docs = []
# TODO(rupeshbansal, deshraj): Add support for skip embeddings here if already exists
embeddings = self.embedder.embedding_fn(documents) embeddings = self.embedder.embedding_fn(documents)
for id, text, metadata, embeddings in zip(ids, documents, metadatas, embeddings): for id, text, metadata, embeddings in zip(ids, documents, metadatas, embeddings):
docs.append( docs.append(
@@ -123,7 +130,7 @@ class OpenSearchDB(BaseVectorDB):
bulk(self.client, docs) bulk(self.client, docs)
self.client.indices.refresh(index=self._get_index()) self.client.indices.refresh(index=self._get_index())
def query(self, input_query: List[str], n_results: int, where: Dict[str, any]) -> List[str]: def query(self, input_query: List[str], n_results: int, where: Dict[str, any], skip_embedding: bool) -> List[str]:
""" """
query contents from vector data base based on vector similarity query contents from vector data base based on vector similarity
@@ -133,9 +140,12 @@ class OpenSearchDB(BaseVectorDB):
:type n_results: int :type n_results: int
:param where: Optional. to filter data :param where: Optional. to filter data
:type where: Dict[str, any] :type where: Dict[str, any]
:param skip_embedding: Optional. If True, then the input_query is assumed to be already embedded.
:type skip_embedding: bool
:return: Database contents that are the result of the query :return: Database contents that are the result of the query
:rtype: List[str] :rtype: List[str]
""" """
# TODO(rupeshbansal, deshraj): Add support for skip embeddings here if already exists
embeddings = OpenAIEmbeddings() embeddings = OpenAIEmbeddings()
docsearch = OpenSearchVectorSearch( docsearch = OpenSearchVectorSearch(
index_name=self._get_index(), index_name=self._get_index(),

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "embedchain" name = "embedchain"
version = "0.0.65" version = "0.0.66"
description = "embedchain is a framework to easily create LLM powered bots over any dataset" description = "embedchain is a framework to easily create LLM powered bots over any dataset"
authors = ["Taranjeet Singh"] authors = ["Taranjeet Singh"]
license = "Apache License" license = "Apache License"