@@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from chromadb import Collection, QueryResult
|
||||
from langchain.docstore.document import Document
|
||||
@@ -76,7 +76,7 @@ class ChromaDB(BaseVectorDB):
|
||||
return self.client
|
||||
|
||||
@staticmethod
|
||||
def _generate_where_clause(where: Dict[str, any]) -> Dict[str, any]:
|
||||
def _generate_where_clause(where: dict[str, any]) -> dict[str, any]:
|
||||
# If only one filter is supplied, return it as is
|
||||
# (no need to wrap in $and based on chroma docs)
|
||||
if len(where.keys()) <= 1:
|
||||
@@ -105,18 +105,18 @@ class ChromaDB(BaseVectorDB):
|
||||
)
|
||||
return self.collection
|
||||
|
||||
def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
|
||||
def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
|
||||
"""
|
||||
Get existing doc ids present in vector database
|
||||
|
||||
:param ids: list of doc ids to check for existence
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
:param where: Optional. to filter data
|
||||
:type where: Dict[str, Any]
|
||||
:type where: dict[str, Any]
|
||||
:param limit: Optional. maximum number of documents
|
||||
:type limit: Optional[int]
|
||||
:return: Existing documents.
|
||||
:rtype: List[str]
|
||||
:rtype: list[str]
|
||||
"""
|
||||
args = {}
|
||||
if ids:
|
||||
@@ -129,23 +129,23 @@ class ChromaDB(BaseVectorDB):
|
||||
|
||||
def add(
|
||||
self,
|
||||
embeddings: List[List[float]],
|
||||
documents: List[str],
|
||||
metadatas: List[object],
|
||||
ids: List[str],
|
||||
**kwargs: Optional[Dict[str, Any]],
|
||||
embeddings: list[list[float]],
|
||||
documents: list[str],
|
||||
metadatas: list[object],
|
||||
ids: list[str],
|
||||
**kwargs: Optional[dict[str, Any]],
|
||||
) -> Any:
|
||||
"""
|
||||
Add vectors to chroma database
|
||||
|
||||
:param embeddings: list of embeddings to add
|
||||
:type embeddings: List[List[str]]
|
||||
:type embeddings: list[list[str]]
|
||||
:param documents: Documents
|
||||
:type documents: List[str]
|
||||
:type documents: list[str]
|
||||
:param metadatas: Metadatas
|
||||
:type metadatas: List[object]
|
||||
:type metadatas: list[object]
|
||||
:param ids: ids
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
"""
|
||||
size = len(documents)
|
||||
if len(documents) != size or len(metadatas) != size or len(ids) != size:
|
||||
@@ -182,27 +182,27 @@ class ChromaDB(BaseVectorDB):
|
||||
|
||||
def query(
|
||||
self,
|
||||
input_query: List[str],
|
||||
input_query: list[str],
|
||||
n_results: int,
|
||||
where: Dict[str, any],
|
||||
where: dict[str, any],
|
||||
citations: bool = False,
|
||||
**kwargs: Optional[Dict[str, Any]],
|
||||
) -> Union[List[Tuple[str, Dict]], List[str]]:
|
||||
**kwargs: Optional[dict[str, Any]],
|
||||
) -> Union[list[tuple[str, dict]], list[str]]:
|
||||
"""
|
||||
Query contents from vector database based on vector similarity
|
||||
|
||||
:param input_query: list of query string
|
||||
:type input_query: List[str]
|
||||
:type input_query: list[str]
|
||||
:param n_results: no of similar documents to fetch from database
|
||||
:type n_results: int
|
||||
:param where: to filter data
|
||||
:type where: Dict[str, Any]
|
||||
:type where: dict[str, Any]
|
||||
:param citations: we use citations boolean param to return context along with the answer.
|
||||
:type citations: bool, default is False.
|
||||
:raises InvalidDimensionException: Dimensions do not match.
|
||||
:return: The content of the document that matched your query,
|
||||
along with url of the source and doc_id (if citations flag is true)
|
||||
:rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
|
||||
:rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
|
||||
"""
|
||||
try:
|
||||
result = self.collection.query(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
try:
|
||||
from elasticsearch import Elasticsearch
|
||||
@@ -84,14 +84,14 @@ class ElasticsearchDB(BaseVectorDB):
|
||||
def _get_or_create_collection(self, name):
|
||||
"""Note: nothing to return here. Discuss later"""
|
||||
|
||||
def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
|
||||
def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
|
||||
"""
|
||||
Get existing doc ids present in vector database
|
||||
|
||||
:param ids: _list of doc ids to check for existence
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
:param where: to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:return: ids
|
||||
:rtype: Set[str]
|
||||
"""
|
||||
@@ -110,22 +110,22 @@ class ElasticsearchDB(BaseVectorDB):
|
||||
|
||||
def add(
|
||||
self,
|
||||
embeddings: List[List[float]],
|
||||
documents: List[str],
|
||||
metadatas: List[object],
|
||||
ids: List[str],
|
||||
**kwargs: Optional[Dict[str, any]],
|
||||
embeddings: list[list[float]],
|
||||
documents: list[str],
|
||||
metadatas: list[object],
|
||||
ids: list[str],
|
||||
**kwargs: Optional[dict[str, any]],
|
||||
) -> Any:
|
||||
"""
|
||||
add data in vector database
|
||||
:param embeddings: list of embeddings to add
|
||||
:type embeddings: List[List[str]]
|
||||
:type embeddings: list[list[str]]
|
||||
:param documents: list of texts to add
|
||||
:type documents: List[str]
|
||||
:type documents: list[str]
|
||||
:param metadatas: list of metadata associated with docs
|
||||
:type metadatas: List[object]
|
||||
:type metadatas: list[object]
|
||||
:param ids: ids of docs
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
"""
|
||||
|
||||
embeddings = self.embedder.embedding_fn(documents)
|
||||
@@ -154,27 +154,27 @@ class ElasticsearchDB(BaseVectorDB):
|
||||
|
||||
def query(
|
||||
self,
|
||||
input_query: List[str],
|
||||
input_query: list[str],
|
||||
n_results: int,
|
||||
where: Dict[str, any],
|
||||
where: dict[str, any],
|
||||
citations: bool = False,
|
||||
**kwargs: Optional[Dict[str, Any]],
|
||||
) -> Union[List[Tuple[str, Dict]], List[str]]:
|
||||
**kwargs: Optional[dict[str, Any]],
|
||||
) -> Union[list[tuple[str, dict]], list[str]]:
|
||||
"""
|
||||
query contents from vector database based on vector similarity
|
||||
|
||||
:param input_query: list of query string
|
||||
:type input_query: List[str]
|
||||
:type input_query: list[str]
|
||||
:param n_results: no of similar documents to fetch from database
|
||||
:type n_results: int
|
||||
:param where: Optional. to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:return: The context of the document that matched your query, url of the source, doc_id
|
||||
:param citations: we use citations boolean param to return context along with the answer.
|
||||
:type citations: bool, default is False.
|
||||
:return: The content of the document that matched your query,
|
||||
along with url of the source and doc_id (if citations flag is true)
|
||||
:rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
|
||||
:rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
|
||||
"""
|
||||
input_query_vector = self.embedder.embedding_fn(input_query)
|
||||
query_vector = input_query_vector[0]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
@@ -78,17 +78,17 @@ class OpenSearchDB(BaseVectorDB):
|
||||
"""Note: nothing to return here. Discuss later"""
|
||||
|
||||
def get(
|
||||
self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None
|
||||
) -> Set[str]:
|
||||
self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None
|
||||
) -> set[str]:
|
||||
"""
|
||||
Get existing doc ids present in vector database
|
||||
|
||||
:param ids: _list of doc ids to check for existence
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
:param where: to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:return: ids
|
||||
:type: Set[str]
|
||||
:type: set[str]
|
||||
"""
|
||||
query = {}
|
||||
if ids:
|
||||
@@ -116,19 +116,19 @@ class OpenSearchDB(BaseVectorDB):
|
||||
|
||||
def add(
|
||||
self,
|
||||
embeddings: List[List[str]],
|
||||
documents: List[str],
|
||||
metadatas: List[object],
|
||||
ids: List[str],
|
||||
**kwargs: Optional[Dict[str, any]],
|
||||
embeddings: list[list[str]],
|
||||
documents: list[str],
|
||||
metadatas: list[object],
|
||||
ids: list[str],
|
||||
**kwargs: Optional[dict[str, any]],
|
||||
):
|
||||
"""Add data in vector database.
|
||||
|
||||
Args:
|
||||
embeddings (List[List[str]]): List of embeddings to add.
|
||||
documents (List[str]): List of texts to add.
|
||||
metadatas (List[object]): List of metadata associated with docs.
|
||||
ids (List[str]): IDs of docs.
|
||||
embeddings (list[list[str]]): list of embeddings to add.
|
||||
documents (list[str]): list of texts to add.
|
||||
metadatas (list[object]): list of metadata associated with docs.
|
||||
ids (list[str]): IDs of docs.
|
||||
"""
|
||||
for batch_start in tqdm(range(0, len(documents), self.BATCH_SIZE), desc="Inserting batches in opensearch"):
|
||||
batch_end = batch_start + self.BATCH_SIZE
|
||||
@@ -156,26 +156,26 @@ class OpenSearchDB(BaseVectorDB):
|
||||
|
||||
def query(
|
||||
self,
|
||||
input_query: List[str],
|
||||
input_query: list[str],
|
||||
n_results: int,
|
||||
where: Dict[str, any],
|
||||
where: dict[str, any],
|
||||
citations: bool = False,
|
||||
**kwargs: Optional[Dict[str, Any]],
|
||||
) -> Union[List[Tuple[str, Dict]], List[str]]:
|
||||
**kwargs: Optional[dict[str, Any]],
|
||||
) -> Union[list[tuple[str, dict]], list[str]]:
|
||||
"""
|
||||
query contents from vector database based on vector similarity
|
||||
|
||||
:param input_query: list of query string
|
||||
:type input_query: List[str]
|
||||
:type input_query: list[str]
|
||||
:param n_results: no of similar documents to fetch from database
|
||||
:type n_results: int
|
||||
:param where: Optional. to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:param citations: we use citations boolean param to return context along with the answer.
|
||||
:type citations: bool, default is False.
|
||||
:return: The content of the document that matched your query,
|
||||
along with url of the source and doc_id (if citations flag is true)
|
||||
:rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
|
||||
:rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
|
||||
"""
|
||||
embeddings = OpenAIEmbeddings()
|
||||
docsearch = OpenSearchVectorSearch(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import os
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
try:
|
||||
import pinecone
|
||||
@@ -67,14 +67,14 @@ class PineconeDB(BaseVectorDB):
|
||||
)
|
||||
return pinecone.Index(self.index_name)
|
||||
|
||||
def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
|
||||
def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
|
||||
"""
|
||||
Get existing doc ids present in vector database
|
||||
|
||||
:param ids: _list of doc ids to check for existence
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
:param where: to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:return: ids
|
||||
:rtype: Set[str]
|
||||
"""
|
||||
@@ -88,20 +88,20 @@ class PineconeDB(BaseVectorDB):
|
||||
|
||||
def add(
|
||||
self,
|
||||
embeddings: List[List[float]],
|
||||
documents: List[str],
|
||||
metadatas: List[object],
|
||||
ids: List[str],
|
||||
**kwargs: Optional[Dict[str, any]],
|
||||
embeddings: list[list[float]],
|
||||
documents: list[str],
|
||||
metadatas: list[object],
|
||||
ids: list[str],
|
||||
**kwargs: Optional[dict[str, any]],
|
||||
):
|
||||
"""add data in vector database
|
||||
|
||||
:param documents: list of texts to add
|
||||
:type documents: List[str]
|
||||
:type documents: list[str]
|
||||
:param metadatas: list of metadata associated with docs
|
||||
:type metadatas: List[object]
|
||||
:type metadatas: list[object]
|
||||
:param ids: ids of docs
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
"""
|
||||
docs = []
|
||||
print("Adding documents to Pinecone...")
|
||||
@@ -120,25 +120,25 @@ class PineconeDB(BaseVectorDB):
|
||||
|
||||
def query(
|
||||
self,
|
||||
input_query: List[str],
|
||||
input_query: list[str],
|
||||
n_results: int,
|
||||
where: Dict[str, any],
|
||||
where: dict[str, any],
|
||||
citations: bool = False,
|
||||
**kwargs: Optional[Dict[str, any]],
|
||||
) -> Union[List[Tuple[str, Dict]], List[str]]:
|
||||
**kwargs: Optional[dict[str, any]],
|
||||
) -> Union[list[tuple[str, dict]], list[str]]:
|
||||
"""
|
||||
query contents from vector database based on vector similarity
|
||||
:param input_query: list of query string
|
||||
:type input_query: List[str]
|
||||
:type input_query: list[str]
|
||||
:param n_results: no of similar documents to fetch from database
|
||||
:type n_results: int
|
||||
:param where: Optional. to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:param citations: we use citations boolean param to return context along with the answer.
|
||||
:type citations: bool, default is False.
|
||||
:return: The content of the document that matched your query,
|
||||
along with url of the source and doc_id (if citations flag is true)
|
||||
:rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
|
||||
:rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
|
||||
"""
|
||||
query_vector = self.embedder.embedding_fn([input_query])[0]
|
||||
data = self.client.query(vector=query_vector, filter=where, top_k=n_results, include_metadata=True, **kwargs)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import copy
|
||||
import os
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
try:
|
||||
from qdrant_client import QdrantClient
|
||||
@@ -69,14 +69,14 @@ class QdrantDB(BaseVectorDB):
|
||||
def _get_or_create_collection(self):
|
||||
return f"{self.config.collection_name}-{self.embedder.vector_dimension}".lower().replace("_", "-")
|
||||
|
||||
def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
|
||||
def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
|
||||
"""
|
||||
Get existing doc ids present in vector database
|
||||
|
||||
:param ids: _list of doc ids to check for existence
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
:param where: to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:param limit: The number of entries to be fetched
|
||||
:type limit: Optional int, defaults to None
|
||||
:return: All the existing IDs
|
||||
@@ -122,21 +122,21 @@ class QdrantDB(BaseVectorDB):
|
||||
|
||||
def add(
|
||||
self,
|
||||
embeddings: List[List[float]],
|
||||
documents: List[str],
|
||||
metadatas: List[object],
|
||||
ids: List[str],
|
||||
**kwargs: Optional[Dict[str, any]],
|
||||
embeddings: list[list[float]],
|
||||
documents: list[str],
|
||||
metadatas: list[object],
|
||||
ids: list[str],
|
||||
**kwargs: Optional[dict[str, any]],
|
||||
):
|
||||
"""add data in vector database
|
||||
:param embeddings: list of embeddings for the corresponding documents to be added
|
||||
:type documents: List[List[float]]
|
||||
:type documents: list[list[float]]
|
||||
:param documents: list of texts to add
|
||||
:type documents: List[str]
|
||||
:type documents: list[str]
|
||||
:param metadatas: list of metadata associated with docs
|
||||
:type metadatas: List[object]
|
||||
:type metadatas: list[object]
|
||||
:param ids: ids of docs
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
"""
|
||||
embeddings = self.embedder.embedding_fn(documents)
|
||||
|
||||
@@ -159,25 +159,25 @@ class QdrantDB(BaseVectorDB):
|
||||
|
||||
def query(
|
||||
self,
|
||||
input_query: List[str],
|
||||
input_query: list[str],
|
||||
n_results: int,
|
||||
where: Dict[str, any],
|
||||
where: dict[str, any],
|
||||
citations: bool = False,
|
||||
**kwargs: Optional[Dict[str, Any]],
|
||||
) -> Union[List[Tuple[str, Dict]], List[str]]:
|
||||
**kwargs: Optional[dict[str, Any]],
|
||||
) -> Union[list[tuple[str, dict]], list[str]]:
|
||||
"""
|
||||
query contents from vector database based on vector similarity
|
||||
:param input_query: list of query string
|
||||
:type input_query: List[str]
|
||||
:type input_query: list[str]
|
||||
:param n_results: no of similar documents to fetch from database
|
||||
:type n_results: int
|
||||
:param where: Optional. to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:param citations: we use citations boolean param to return context along with the answer.
|
||||
:type citations: bool, default is False.
|
||||
:return: The content of the document that matched your query,
|
||||
along with url of the source and doc_id (if citations flag is true)
|
||||
:rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
|
||||
:rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
|
||||
"""
|
||||
query_vector = self.embedder.embedding_fn([input_query])[0]
|
||||
keys = set(where.keys() if where is not None else set())
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import copy
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
try:
|
||||
import weaviate
|
||||
@@ -117,13 +117,13 @@ class WeaviateDB(BaseVectorDB):
|
||||
|
||||
self.client.schema.create(class_obj)
|
||||
|
||||
def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
|
||||
def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
|
||||
"""
|
||||
Get existing doc ids present in vector database
|
||||
:param ids: _list of doc ids to check for existance
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
:param where: to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:return: ids
|
||||
:rtype: Set[str]
|
||||
"""
|
||||
@@ -153,21 +153,21 @@ class WeaviateDB(BaseVectorDB):
|
||||
|
||||
def add(
|
||||
self,
|
||||
embeddings: List[List[float]],
|
||||
documents: List[str],
|
||||
metadatas: List[object],
|
||||
ids: List[str],
|
||||
**kwargs: Optional[Dict[str, any]],
|
||||
embeddings: list[list[float]],
|
||||
documents: list[str],
|
||||
metadatas: list[object],
|
||||
ids: list[str],
|
||||
**kwargs: Optional[dict[str, any]],
|
||||
):
|
||||
"""add data in vector database
|
||||
:param embeddings: list of embeddings for the corresponding documents to be added
|
||||
:type documents: List[List[float]]
|
||||
:type documents: list[list[float]]
|
||||
:param documents: list of texts to add
|
||||
:type documents: List[str]
|
||||
:type documents: list[str]
|
||||
:param metadatas: list of metadata associated with docs
|
||||
:type metadatas: List[object]
|
||||
:type metadatas: list[object]
|
||||
:param ids: ids of docs
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
"""
|
||||
embeddings = self.embedder.embedding_fn(documents)
|
||||
self.client.batch.configure(batch_size=self.BATCH_SIZE, timeout_retries=3) # Configure batch
|
||||
@@ -192,25 +192,25 @@ class WeaviateDB(BaseVectorDB):
|
||||
|
||||
def query(
|
||||
self,
|
||||
input_query: List[str],
|
||||
input_query: list[str],
|
||||
n_results: int,
|
||||
where: Dict[str, any],
|
||||
where: dict[str, any],
|
||||
citations: bool = False,
|
||||
**kwargs: Optional[Dict[str, Any]],
|
||||
) -> Union[List[Tuple[str, Dict]], List[str]]:
|
||||
**kwargs: Optional[dict[str, Any]],
|
||||
) -> Union[list[tuple[str, dict]], list[str]]:
|
||||
"""
|
||||
query contents from vector database based on vector similarity
|
||||
:param input_query: list of query string
|
||||
:type input_query: List[str]
|
||||
:type input_query: list[str]
|
||||
:param n_results: no of similar documents to fetch from database
|
||||
:type n_results: int
|
||||
:param where: Optional. to filter data
|
||||
:type where: Dict[str, any]
|
||||
:type where: dict[str, any]
|
||||
:param citations: we use citations boolean param to return context along with the answer.
|
||||
:type citations: bool, default is False.
|
||||
:return: The content of the document that matched your query,
|
||||
along with url of the source and doc_id (if citations flag is true)
|
||||
:rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
|
||||
:rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
|
||||
"""
|
||||
query_vector = self.embedder.embedding_fn([input_query])[0]
|
||||
keys = set(where.keys() if where is not None else set())
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from embedchain.config import ZillizDBConfig
|
||||
from embedchain.helpers.json_serializable import register_deserializable
|
||||
@@ -88,14 +88,14 @@ class ZillizVectorDB(BaseVectorDB):
|
||||
self.collection.create_index("embeddings", index)
|
||||
return self.collection
|
||||
|
||||
def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
|
||||
def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
|
||||
"""
|
||||
Get existing doc ids present in vector database
|
||||
|
||||
:param ids: list of doc ids to check for existence
|
||||
:type ids: List[str]
|
||||
:type ids: list[str]
|
||||
:param where: Optional. to filter data
|
||||
:type where: Dict[str, Any]
|
||||
:type where: dict[str, Any]
|
||||
:param limit: Optional. maximum number of documents
|
||||
:type limit: Optional[int]
|
||||
:return: Existing documents.
|
||||
@@ -115,11 +115,11 @@ class ZillizVectorDB(BaseVectorDB):
|
||||
|
||||
def add(
|
||||
self,
|
||||
embeddings: List[List[float]],
|
||||
documents: List[str],
|
||||
metadatas: List[object],
|
||||
ids: List[str],
|
||||
**kwargs: Optional[Dict[str, any]],
|
||||
embeddings: list[list[float]],
|
||||
documents: list[str],
|
||||
metadatas: list[object],
|
||||
ids: list[str],
|
||||
**kwargs: Optional[dict[str, any]],
|
||||
):
|
||||
"""Add to database"""
|
||||
embeddings = self.embedder.embedding_fn(documents)
|
||||
@@ -134,17 +134,17 @@ class ZillizVectorDB(BaseVectorDB):
|
||||
|
||||
def query(
|
||||
self,
|
||||
input_query: List[str],
|
||||
input_query: list[str],
|
||||
n_results: int,
|
||||
where: Dict[str, any],
|
||||
where: dict[str, any],
|
||||
citations: bool = False,
|
||||
**kwargs: Optional[Dict[str, Any]],
|
||||
) -> Union[List[Tuple[str, Dict]], List[str]]:
|
||||
**kwargs: Optional[dict[str, Any]],
|
||||
) -> Union[list[tuple[str, dict]], list[str]]:
|
||||
"""
|
||||
Query contents from vector database based on vector similarity
|
||||
|
||||
:param input_query: list of query string
|
||||
:type input_query: List[str]
|
||||
:type input_query: list[str]
|
||||
:param n_results: no of similar documents to fetch from database
|
||||
:type n_results: int
|
||||
:param where: to filter data
|
||||
@@ -154,7 +154,7 @@ class ZillizVectorDB(BaseVectorDB):
|
||||
:type citations: bool, default is False.
|
||||
:return: The content of the document that matched your query,
|
||||
along with url of the source and doc_id (if citations flag is true)
|
||||
:rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
|
||||
:rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
|
||||
"""
|
||||
|
||||
if self.collection.is_empty:
|
||||
@@ -200,7 +200,7 @@ class ZillizVectorDB(BaseVectorDB):
|
||||
"""
|
||||
return self.collection.num_entities
|
||||
|
||||
def reset(self, collection_names: List[str] = None):
|
||||
def reset(self, collection_names: list[str] = None):
|
||||
"""
|
||||
Resets the database. Deletes all embeddings irreversibly.
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user