Added docs for skip_embedding and embeddings argument of vectordbs (#784)

This commit is contained in:
Rupesh Bansal
2023-10-10 00:31:44 +05:30
committed by GitHub
parent 702067e521
commit bc649b9a85
3 changed files with 18 additions and 6 deletions

View File

@@ -126,12 +126,16 @@ class ChromaDB(BaseVectorDB):
""" """
Add vectors to chroma database Add vectors to chroma database
:param embeddings: list of embeddings to add
:type embeddings: List[List[str]]
:param documents: Documents :param documents: Documents
:type documents: List[str] :type documents: List[str]
:param metadatas: Metadatas :param metadatas: Metadatas
:type metadatas: List[object] :type metadatas: List[object]
:param ids: ids :param ids: ids
:type ids: List[str] :type ids: List[str]
:param skip_embedding: Optional. If True, then the embeddings are assumed to be already generated.
:type skip_embedding: bool
""" """
if skip_embedding: if skip_embedding:
self.collection.add(embeddings=embeddings, documents=documents, metadatas=metadatas, ids=ids) self.collection.add(embeddings=embeddings, documents=documents, metadatas=metadatas, ids=ids)
@@ -158,7 +162,7 @@ class ChromaDB(BaseVectorDB):
def query(self, input_query: List[str], n_results: int, where: Dict[str, any], skip_embedding: bool) -> List[str]: def query(self, input_query: List[str], n_results: int, where: Dict[str, any], skip_embedding: bool) -> List[str]:
""" """
Query contents from vector data base based on vector similarity Query contents from vector database based on vector similarity
:param input_query: list of query string :param input_query: list of query string
:type input_query: List[str] :type input_query: List[str]
@@ -166,6 +170,8 @@ class ChromaDB(BaseVectorDB):
:type n_results: int :type n_results: int
:param where: to filter data :param where: to filter data
:type where: Dict[str, Any] :type where: Dict[str, Any]
:param skip_embedding: Optional. If True, then the input_query is assumed to be already embedded.
:type skip_embedding: bool
:raises InvalidDimensionException: Dimensions do not match. :raises InvalidDimensionException: Dimensions do not match.
:return: The content of the document that matched your query. :return: The content of the document that matched your query.
:rtype: List[str] :rtype: List[str]

View File

@@ -110,12 +110,16 @@ class ElasticsearchDB(BaseVectorDB):
) -> Any: ) -> Any:
""" """
add data in vector database add data in vector database
:param embeddings: list of embeddings to add
:type embeddings: List[List[str]]
:param documents: list of texts to add :param documents: list of texts to add
:type documents: List[str] :type documents: List[str]
:param metadatas: list of metadata associated with docs :param metadatas: list of metadata associated with docs
:type metadatas: List[object] :type metadatas: List[object]
:param ids: ids of docs :param ids: ids of docs
:type ids: List[str] :type ids: List[str]
:param skip_embedding: Optional. If True, then the input_query is assumed to be already embedded.
:type skip_embedding: bool
""" """
docs = [] docs = []
@@ -143,6 +147,8 @@ class ElasticsearchDB(BaseVectorDB):
:type n_results: int :type n_results: int
:param where: Optional. to filter data :param where: Optional. to filter data
:type where: Dict[str, any] :type where: Dict[str, any]
:param skip_embedding: Optional. If True, then the input_query is assumed to be already embedded.
:type skip_embedding: bool
:return: Database contents that are the result of the query :return: Database contents that are the result of the query
:rtype: List[str] :rtype: List[str]
""" """

View File

@@ -110,12 +110,12 @@ class OpenSearchDB(BaseVectorDB):
return result return result
def add( def add(
self, embeddings: List[str], documents: List[str], metadatas: List[object], ids: List[str], skip_embedding: bool self, embeddings: List[List[str]], documents: List[str], metadatas: List[object], ids: List[str],
): skip_embedding: bool):
"""add data in vector database """add data in vector database
:param embeddings: list of embeddings to add :param embeddings: list of embeddings to add
:type embeddings: List[str] :type embeddings: List[List[str]]
:param documents: list of texts to add :param documents: list of texts to add
:type documents: List[str] :type documents: List[str]
:param metadatas: list of metadata associated with docs :param metadatas: list of metadata associated with docs
@@ -127,8 +127,8 @@ class OpenSearchDB(BaseVectorDB):
""" """
docs = [] docs = []
# TODO(rupeshbansal, deshraj): Add support for skip embeddings here if already exists if not skip_embedding:
embeddings = self.embedder.embedding_fn(documents) embeddings = self.embedder.embedding_fn(documents)
for id, text, metadata, embeddings in zip(ids, documents, metadatas, embeddings): for id, text, metadata, embeddings in zip(ids, documents, metadatas, embeddings):
docs.append( docs.append(
{ {