#1128 | Remove deprecated type hints from typing module (#1131)

2024-01-09 18:35:24 +01:00
parent c9df7a2020
commit 0de9491c61
41 changed files with 272 additions and 267 deletions
--- a/embedchain/vectordb/chroma.py
+++ b/embedchain/vectordb/chroma.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Optional, Union

 from chromadb import Collection, QueryResult
 from langchain.docstore.document import Document
@@ -76,7 +76,7 @@ class ChromaDB(BaseVectorDB):
        return self.client

    @staticmethod
-    def _generate_where_clause(where: Dict[str, any]) -> Dict[str, any]:
+    def _generate_where_clause(where: dict[str, any]) -> dict[str, any]:
        # If only one filter is supplied, return it as is
        # (no need to wrap in $and based on chroma docs)
        if len(where.keys()) <= 1:
@@ -105,18 +105,18 @@ class ChromaDB(BaseVectorDB):
        )
        return self.collection

-    def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
+    def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
        """
        Get existing doc ids present in vector database

        :param ids: list of doc ids to check for existence
-        :type ids: List[str]
+        :type ids: list[str]
        :param where: Optional. to filter data
-        :type where: Dict[str, Any]
+        :type where: dict[str, Any]
        :param limit: Optional. maximum number of documents
        :type limit: Optional[int]
        :return: Existing documents.
-        :rtype: List[str]
+        :rtype: list[str]
        """
        args = {}
        if ids:
@@ -129,23 +129,23 @@ class ChromaDB(BaseVectorDB):

    def add(
        self,
-        embeddings: List[List[float]],
-        documents: List[str],
-        metadatas: List[object],
-        ids: List[str],
-        **kwargs: Optional[Dict[str, Any]],
+        embeddings: list[list[float]],
+        documents: list[str],
+        metadatas: list[object],
+        ids: list[str],
+        **kwargs: Optional[dict[str, Any]],
    ) -> Any:
        """
        Add vectors to chroma database

        :param embeddings: list of embeddings to add
-        :type embeddings: List[List[str]]
+        :type embeddings: list[list[str]]
        :param documents: Documents
-        :type documents: List[str]
+        :type documents: list[str]
        :param metadatas: Metadatas
-        :type metadatas: List[object]
+        :type metadatas: list[object]
        :param ids: ids
-        :type ids: List[str]
+        :type ids: list[str]
        """
        size = len(documents)
        if len(documents) != size or len(metadatas) != size or len(ids) != size:
@@ -182,27 +182,27 @@ class ChromaDB(BaseVectorDB):

    def query(
        self,
-        input_query: List[str],
+        input_query: list[str],
        n_results: int,
-        where: Dict[str, any],
+        where: dict[str, any],
        citations: bool = False,
-        **kwargs: Optional[Dict[str, Any]],
-    ) -> Union[List[Tuple[str, Dict]], List[str]]:
+        **kwargs: Optional[dict[str, Any]],
+    ) -> Union[list[tuple[str, dict]], list[str]]:
        """
        Query contents from vector database based on vector similarity

        :param input_query: list of query string
-        :type input_query: List[str]
+        :type input_query: list[str]
        :param n_results: no of similar documents to fetch from database
        :type n_results: int
        :param where: to filter data
-        :type where: Dict[str, Any]
+        :type where: dict[str, Any]
        :param citations: we use citations boolean param to return context along with the answer.
        :type citations: bool, default is False.
        :raises InvalidDimensionException: Dimensions do not match.
        :return: The content of the document that matched your query,
        along with url of the source and doc_id (if citations flag is true)
-        :rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
+        :rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
        """
        try:
            result = self.collection.query(
--- a/embedchain/vectordb/elasticsearch.py
+++ b/embedchain/vectordb/elasticsearch.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Optional, Union

 try:
    from elasticsearch import Elasticsearch
@@ -84,14 +84,14 @@ class ElasticsearchDB(BaseVectorDB):
    def _get_or_create_collection(self, name):
        """Note: nothing to return here. Discuss later"""

-    def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
+    def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
        """
        Get existing doc ids present in vector database

        :param ids: _list of doc ids to check for existence
-        :type ids: List[str]
+        :type ids: list[str]
        :param where: to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :return: ids
        :rtype: Set[str]
        """
@@ -110,22 +110,22 @@ class ElasticsearchDB(BaseVectorDB):

    def add(
        self,
-        embeddings: List[List[float]],
-        documents: List[str],
-        metadatas: List[object],
-        ids: List[str],
-        **kwargs: Optional[Dict[str, any]],
+        embeddings: list[list[float]],
+        documents: list[str],
+        metadatas: list[object],
+        ids: list[str],
+        **kwargs: Optional[dict[str, any]],
    ) -> Any:
        """
        add data in vector database
        :param embeddings: list of embeddings to add
-        :type embeddings: List[List[str]]
+        :type embeddings: list[list[str]]
        :param documents: list of texts to add
-        :type documents: List[str]
+        :type documents: list[str]
        :param metadatas: list of metadata associated with docs
-        :type metadatas: List[object]
+        :type metadatas: list[object]
        :param ids: ids of docs
-        :type ids: List[str]
+        :type ids: list[str]
        """

        embeddings = self.embedder.embedding_fn(documents)
@@ -154,27 +154,27 @@ class ElasticsearchDB(BaseVectorDB):

    def query(
        self,
-        input_query: List[str],
+        input_query: list[str],
        n_results: int,
-        where: Dict[str, any],
+        where: dict[str, any],
        citations: bool = False,
-        **kwargs: Optional[Dict[str, Any]],
-    ) -> Union[List[Tuple[str, Dict]], List[str]]:
+        **kwargs: Optional[dict[str, Any]],
+    ) -> Union[list[tuple[str, dict]], list[str]]:
        """
        query contents from vector database based on vector similarity

        :param input_query: list of query string
-        :type input_query: List[str]
+        :type input_query: list[str]
        :param n_results: no of similar documents to fetch from database
        :type n_results: int
        :param where: Optional. to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :return: The context of the document that matched your query, url of the source, doc_id
        :param citations: we use citations boolean param to return context along with the answer.
        :type citations: bool, default is False.
        :return: The content of the document that matched your query,
        along with url of the source and doc_id (if citations flag is true)
-        :rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
+        :rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
        """
        input_query_vector = self.embedder.embedding_fn(input_query)
        query_vector = input_query_vector[0]
--- a/embedchain/vectordb/opensearch.py
+++ b/embedchain/vectordb/opensearch.py
@@ -1,6 +1,6 @@
 import logging
 import time
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Optional, Union

 from tqdm import tqdm

@@ -78,17 +78,17 @@ class OpenSearchDB(BaseVectorDB):
        """Note: nothing to return here. Discuss later"""

    def get(
-        self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None
-    ) -> Set[str]:
+        self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None
+    ) -> set[str]:
        """
        Get existing doc ids present in vector database

        :param ids: _list of doc ids to check for existence
-        :type ids: List[str]
+        :type ids: list[str]
        :param where: to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :return: ids
-        :type: Set[str]
+        :type: set[str]
        """
        query = {}
        if ids:
@@ -116,19 +116,19 @@ class OpenSearchDB(BaseVectorDB):

    def add(
        self,
-        embeddings: List[List[str]],
-        documents: List[str],
-        metadatas: List[object],
-        ids: List[str],
-        **kwargs: Optional[Dict[str, any]],
+        embeddings: list[list[str]],
+        documents: list[str],
+        metadatas: list[object],
+        ids: list[str],
+        **kwargs: Optional[dict[str, any]],
    ):
        """Add data in vector database.

        Args:
-            embeddings (List[List[str]]): List of embeddings to add.
-            documents (List[str]): List of texts to add.
-            metadatas (List[object]): List of metadata associated with docs.
-            ids (List[str]): IDs of docs.
+            embeddings (list[list[str]]): list of embeddings to add.
+            documents (list[str]): list of texts to add.
+            metadatas (list[object]): list of metadata associated with docs.
+            ids (list[str]): IDs of docs.
        """
        for batch_start in tqdm(range(0, len(documents), self.BATCH_SIZE), desc="Inserting batches in opensearch"):
            batch_end = batch_start + self.BATCH_SIZE
@@ -156,26 +156,26 @@ class OpenSearchDB(BaseVectorDB):

    def query(
        self,
-        input_query: List[str],
+        input_query: list[str],
        n_results: int,
-        where: Dict[str, any],
+        where: dict[str, any],
        citations: bool = False,
-        **kwargs: Optional[Dict[str, Any]],
-    ) -> Union[List[Tuple[str, Dict]], List[str]]:
+        **kwargs: Optional[dict[str, Any]],
+    ) -> Union[list[tuple[str, dict]], list[str]]:
        """
        query contents from vector database based on vector similarity

        :param input_query: list of query string
-        :type input_query: List[str]
+        :type input_query: list[str]
        :param n_results: no of similar documents to fetch from database
        :type n_results: int
        :param where: Optional. to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :param citations: we use citations boolean param to return context along with the answer.
        :type citations: bool, default is False.
        :return: The content of the document that matched your query,
        along with url of the source and doc_id (if citations flag is true)
-        :rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
+        :rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
        """
        embeddings = OpenAIEmbeddings()
        docsearch = OpenSearchVectorSearch(
--- a/embedchain/vectordb/pinecone.py
+++ b/embedchain/vectordb/pinecone.py
@@ -1,5 +1,5 @@
 import os
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Optional, Union

 try:
    import pinecone
@@ -67,14 +67,14 @@ class PineconeDB(BaseVectorDB):
            )
        return pinecone.Index(self.index_name)

-    def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
+    def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
        """
        Get existing doc ids present in vector database

        :param ids: _list of doc ids to check for existence
-        :type ids: List[str]
+        :type ids: list[str]
        :param where: to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :return: ids
        :rtype: Set[str]
        """
@@ -88,20 +88,20 @@ class PineconeDB(BaseVectorDB):

    def add(
        self,
-        embeddings: List[List[float]],
-        documents: List[str],
-        metadatas: List[object],
-        ids: List[str],
-        **kwargs: Optional[Dict[str, any]],
+        embeddings: list[list[float]],
+        documents: list[str],
+        metadatas: list[object],
+        ids: list[str],
+        **kwargs: Optional[dict[str, any]],
    ):
        """add data in vector database

        :param documents: list of texts to add
-        :type documents: List[str]
+        :type documents: list[str]
        :param metadatas: list of metadata associated with docs
-        :type metadatas: List[object]
+        :type metadatas: list[object]
        :param ids: ids of docs
-        :type ids: List[str]
+        :type ids: list[str]
        """
        docs = []
        print("Adding documents to Pinecone...")
@@ -120,25 +120,25 @@ class PineconeDB(BaseVectorDB):

    def query(
        self,
-        input_query: List[str],
+        input_query: list[str],
        n_results: int,
-        where: Dict[str, any],
+        where: dict[str, any],
        citations: bool = False,
-        **kwargs: Optional[Dict[str, any]],
-    ) -> Union[List[Tuple[str, Dict]], List[str]]:
+        **kwargs: Optional[dict[str, any]],
+    ) -> Union[list[tuple[str, dict]], list[str]]:
        """
        query contents from vector database based on vector similarity
        :param input_query: list of query string
-        :type input_query: List[str]
+        :type input_query: list[str]
        :param n_results: no of similar documents to fetch from database
        :type n_results: int
        :param where: Optional. to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :param citations: we use citations boolean param to return context along with the answer.
        :type citations: bool, default is False.
        :return: The content of the document that matched your query,
        along with url of the source and doc_id (if citations flag is true)
-        :rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
+        :rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
        """
        query_vector = self.embedder.embedding_fn([input_query])[0]
        data = self.client.query(vector=query_vector, filter=where, top_k=n_results, include_metadata=True, **kwargs)
--- a/embedchain/vectordb/qdrant.py
+++ b/embedchain/vectordb/qdrant.py
@@ -1,7 +1,7 @@
 import copy
 import os
 import uuid
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Optional, Union

 try:
    from qdrant_client import QdrantClient
@@ -69,14 +69,14 @@ class QdrantDB(BaseVectorDB):
    def _get_or_create_collection(self):
        return f"{self.config.collection_name}-{self.embedder.vector_dimension}".lower().replace("_", "-")

-    def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
+    def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
        """
        Get existing doc ids present in vector database

        :param ids: _list of doc ids to check for existence
-        :type ids: List[str]
+        :type ids: list[str]
        :param where: to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :param limit: The number of entries to be fetched
        :type limit: Optional int, defaults to None
        :return: All the existing IDs
@@ -122,21 +122,21 @@ class QdrantDB(BaseVectorDB):

    def add(
        self,
-        embeddings: List[List[float]],
-        documents: List[str],
-        metadatas: List[object],
-        ids: List[str],
-        **kwargs: Optional[Dict[str, any]],
+        embeddings: list[list[float]],
+        documents: list[str],
+        metadatas: list[object],
+        ids: list[str],
+        **kwargs: Optional[dict[str, any]],
    ):
        """add data in vector database
        :param embeddings: list of embeddings for the corresponding documents to be added
-        :type documents: List[List[float]]
+        :type documents: list[list[float]]
        :param documents: list of texts to add
-        :type documents: List[str]
+        :type documents: list[str]
        :param metadatas: list of metadata associated with docs
-        :type metadatas: List[object]
+        :type metadatas: list[object]
        :param ids: ids of docs
-        :type ids: List[str]
+        :type ids: list[str]
        """
        embeddings = self.embedder.embedding_fn(documents)

@@ -159,25 +159,25 @@ class QdrantDB(BaseVectorDB):

    def query(
        self,
-        input_query: List[str],
+        input_query: list[str],
        n_results: int,
-        where: Dict[str, any],
+        where: dict[str, any],
        citations: bool = False,
-        **kwargs: Optional[Dict[str, Any]],
-    ) -> Union[List[Tuple[str, Dict]], List[str]]:
+        **kwargs: Optional[dict[str, Any]],
+    ) -> Union[list[tuple[str, dict]], list[str]]:
        """
        query contents from vector database based on vector similarity
        :param input_query: list of query string
-        :type input_query: List[str]
+        :type input_query: list[str]
        :param n_results: no of similar documents to fetch from database
        :type n_results: int
        :param where: Optional. to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :param citations: we use citations boolean param to return context along with the answer.
        :type citations: bool, default is False.
        :return: The content of the document that matched your query,
        along with url of the source and doc_id (if citations flag is true)
-        :rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
+        :rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
        """
        query_vector = self.embedder.embedding_fn([input_query])[0]
        keys = set(where.keys() if where is not None else set())
--- a/embedchain/vectordb/weaviate.py
+++ b/embedchain/vectordb/weaviate.py
@@ -1,6 +1,6 @@
 import copy
 import os
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Optional, Union

 try:
    import weaviate
@@ -117,13 +117,13 @@ class WeaviateDB(BaseVectorDB):

            self.client.schema.create(class_obj)

-    def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
+    def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
        """
        Get existing doc ids present in vector database
        :param ids: _list of doc ids to check for existance
-        :type ids: List[str]
+        :type ids: list[str]
        :param where: to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :return: ids
        :rtype: Set[str]
        """
@@ -153,21 +153,21 @@ class WeaviateDB(BaseVectorDB):

    def add(
        self,
-        embeddings: List[List[float]],
-        documents: List[str],
-        metadatas: List[object],
-        ids: List[str],
-        **kwargs: Optional[Dict[str, any]],
+        embeddings: list[list[float]],
+        documents: list[str],
+        metadatas: list[object],
+        ids: list[str],
+        **kwargs: Optional[dict[str, any]],
    ):
        """add data in vector database
        :param embeddings: list of embeddings for the corresponding documents to be added
-        :type documents: List[List[float]]
+        :type documents: list[list[float]]
        :param documents: list of texts to add
-        :type documents: List[str]
+        :type documents: list[str]
        :param metadatas: list of metadata associated with docs
-        :type metadatas: List[object]
+        :type metadatas: list[object]
        :param ids: ids of docs
-        :type ids: List[str]
+        :type ids: list[str]
        """
        embeddings = self.embedder.embedding_fn(documents)
        self.client.batch.configure(batch_size=self.BATCH_SIZE, timeout_retries=3)  # Configure batch
@@ -192,25 +192,25 @@ class WeaviateDB(BaseVectorDB):

    def query(
        self,
-        input_query: List[str],
+        input_query: list[str],
        n_results: int,
-        where: Dict[str, any],
+        where: dict[str, any],
        citations: bool = False,
-        **kwargs: Optional[Dict[str, Any]],
-    ) -> Union[List[Tuple[str, Dict]], List[str]]:
+        **kwargs: Optional[dict[str, Any]],
+    ) -> Union[list[tuple[str, dict]], list[str]]:
        """
        query contents from vector database based on vector similarity
        :param input_query: list of query string
-        :type input_query: List[str]
+        :type input_query: list[str]
        :param n_results: no of similar documents to fetch from database
        :type n_results: int
        :param where: Optional. to filter data
-        :type where: Dict[str, any]
+        :type where: dict[str, any]
        :param citations: we use citations boolean param to return context along with the answer.
        :type citations: bool, default is False.
        :return: The content of the document that matched your query,
        along with url of the source and doc_id (if citations flag is true)
-        :rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
+        :rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
        """
        query_vector = self.embedder.embedding_fn([input_query])[0]
        keys = set(where.keys() if where is not None else set())
--- a/embedchain/vectordb/zilliz.py
+++ b/embedchain/vectordb/zilliz.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Optional, Union

 from embedchain.config import ZillizDBConfig
 from embedchain.helpers.json_serializable import register_deserializable
@@ -88,14 +88,14 @@ class ZillizVectorDB(BaseVectorDB):
            self.collection.create_index("embeddings", index)
        return self.collection

-    def get(self, ids: Optional[List[str]] = None, where: Optional[Dict[str, any]] = None, limit: Optional[int] = None):
+    def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
        """
        Get existing doc ids present in vector database

        :param ids: list of doc ids to check for existence
-        :type ids: List[str]
+        :type ids: list[str]
        :param where: Optional. to filter data
-        :type where: Dict[str, Any]
+        :type where: dict[str, Any]
        :param limit: Optional. maximum number of documents
        :type limit: Optional[int]
        :return: Existing documents.
@@ -115,11 +115,11 @@ class ZillizVectorDB(BaseVectorDB):

    def add(
        self,
-        embeddings: List[List[float]],
-        documents: List[str],
-        metadatas: List[object],
-        ids: List[str],
-        **kwargs: Optional[Dict[str, any]],
+        embeddings: list[list[float]],
+        documents: list[str],
+        metadatas: list[object],
+        ids: list[str],
+        **kwargs: Optional[dict[str, any]],
    ):
        """Add to database"""
        embeddings = self.embedder.embedding_fn(documents)
@@ -134,17 +134,17 @@ class ZillizVectorDB(BaseVectorDB):

    def query(
        self,
-        input_query: List[str],
+        input_query: list[str],
        n_results: int,
-        where: Dict[str, any],
+        where: dict[str, any],
        citations: bool = False,
-        **kwargs: Optional[Dict[str, Any]],
-    ) -> Union[List[Tuple[str, Dict]], List[str]]:
+        **kwargs: Optional[dict[str, Any]],
+    ) -> Union[list[tuple[str, dict]], list[str]]:
        """
        Query contents from vector database based on vector similarity

        :param input_query: list of query string
-        :type input_query: List[str]
+        :type input_query: list[str]
        :param n_results: no of similar documents to fetch from database
        :type n_results: int
        :param where: to filter data
@@ -154,7 +154,7 @@ class ZillizVectorDB(BaseVectorDB):
        :type citations: bool, default is False.
        :return: The content of the document that matched your query,
        along with url of the source and doc_id (if citations flag is true)
-        :rtype: List[str], if citations=False, otherwise List[Tuple[str, str, str]]
+        :rtype: list[str], if citations=False, otherwise list[tuple[str, str, str]]
        """

        if self.collection.is_empty:
@@ -200,7 +200,7 @@ class ZillizVectorDB(BaseVectorDB):
        """
        return self.collection.num_entities

-    def reset(self, collection_names: List[str] = None):
+    def reset(self, collection_names: list[str] = None):
        """
        Resets the database. Deletes all embeddings irreversibly.
        """