Resolve conflicts (#208)

2023-07-10 21:50:05 -07:00
parent 6936d6983d
commit 9ca836520f
32 changed files with 396 additions and 207 deletions
--- a/embedchain/chunkers/base_chunker.py
+++ b/embedchain/chunkers/base_chunker.py
@@ -3,15 +3,17 @@ import hashlib

 class BaseChunker:
    def __init__(self, text_splitter):
-        ''' Initialize the chunker. '''
+        """Initialize the chunker."""
        self.text_splitter = text_splitter

    def create_chunks(self, loader, src):
        """
        Loads data and chunks it.

-        :param loader: The loader which's `load_data` method is used to create the raw data.
-        :param src: The data to be handled by the loader. Can be a URL for remote sources or local content for local loaders. 
+        :param loader: The loader which's `load_data` method is used to create
+        the raw data.
+        :param src: The data to be handled by the loader. Can be a URL for
+        remote sources or local content for local loaders.
        """
        documents = []
        ids = []
@@ -27,7 +29,7 @@ class BaseChunker:

            for chunk in chunks:
                chunk_id = hashlib.sha256((chunk + url).encode()).hexdigest()
-                if (idMap.get(chunk_id) is None):
+                if idMap.get(chunk_id) is None:
                    idMap[chunk_id] = True
                    ids.append(chunk_id)
                    documents.append(chunk)
--- a/embedchain/chunkers/docx_file.py
+++ b/embedchain/chunkers/docx_file.py
@@ -1,10 +1,9 @@
 from typing import Optional
-from embedchain.chunkers.base_chunker import BaseChunker
-from embedchain.config.AddConfig import ChunkerConfig

 from langchain.text_splitter import RecursiveCharacterTextSplitter

-
+from embedchain.chunkers.base_chunker import BaseChunker
+from embedchain.config.AddConfig import ChunkerConfig

 TEXT_SPLITTER_CHUNK_PARAMS = {
    "chunk_size": 1000,
@@ -14,7 +13,8 @@ TEXT_SPLITTER_CHUNK_PARAMS = {


 class DocxFileChunker(BaseChunker):
-    ''' Chunker for .docx file. '''
+    """Chunker for .docx file."""
+
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/pdf_file.py
+++ b/embedchain/chunkers/pdf_file.py
@@ -1,9 +1,9 @@
 from typing import Optional
-from embedchain.chunkers.base_chunker import BaseChunker
-from embedchain.config.AddConfig import ChunkerConfig

 from langchain.text_splitter import RecursiveCharacterTextSplitter

+from embedchain.chunkers.base_chunker import BaseChunker
+from embedchain.config.AddConfig import ChunkerConfig

 TEXT_SPLITTER_CHUNK_PARAMS = {
    "chunk_size": 1000,
@@ -13,7 +13,8 @@ TEXT_SPLITTER_CHUNK_PARAMS = {


 class PdfFileChunker(BaseChunker):
-    ''' Chunker for PDF file. '''
+    """Chunker for PDF file."""
+
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/qna_pair.py
+++ b/embedchain/chunkers/qna_pair.py
@@ -1,9 +1,9 @@
 from typing import Optional
-from embedchain.chunkers.base_chunker import BaseChunker
-from embedchain.config.AddConfig import ChunkerConfig

 from langchain.text_splitter import RecursiveCharacterTextSplitter

+from embedchain.chunkers.base_chunker import BaseChunker
+from embedchain.config.AddConfig import ChunkerConfig

 TEXT_SPLITTER_CHUNK_PARAMS = {
    "chunk_size": 300,
@@ -13,7 +13,8 @@ TEXT_SPLITTER_CHUNK_PARAMS = {


 class QnaPairChunker(BaseChunker):
-    ''' Chunker for QnA pair. '''
+    """Chunker for QnA pair."""
+
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/text.py
+++ b/embedchain/chunkers/text.py
@@ -1,9 +1,9 @@
 from typing import Optional
-from embedchain.chunkers.base_chunker import BaseChunker
-from embedchain.config.AddConfig import ChunkerConfig

 from langchain.text_splitter import RecursiveCharacterTextSplitter

+from embedchain.chunkers.base_chunker import BaseChunker
+from embedchain.config.AddConfig import ChunkerConfig

 TEXT_SPLITTER_CHUNK_PARAMS = {
    "chunk_size": 300,
@@ -13,7 +13,8 @@ TEXT_SPLITTER_CHUNK_PARAMS = {


 class TextChunker(BaseChunker):
-    ''' Chunker for text. '''
+    """Chunker for text."""
+
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/web_page.py
+++ b/embedchain/chunkers/web_page.py
@@ -1,9 +1,9 @@
 from typing import Optional
-from embedchain.chunkers.base_chunker import BaseChunker
-from embedchain.config.AddConfig import ChunkerConfig

 from langchain.text_splitter import RecursiveCharacterTextSplitter

+from embedchain.chunkers.base_chunker import BaseChunker
+from embedchain.config.AddConfig import ChunkerConfig

 TEXT_SPLITTER_CHUNK_PARAMS = {
    "chunk_size": 500,
@@ -13,7 +13,8 @@ TEXT_SPLITTER_CHUNK_PARAMS = {


 class WebPageChunker(BaseChunker):
-    ''' Chunker for web page. '''
+    """Chunker for web page."""
+
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/youtube_video.py
+++ b/embedchain/chunkers/youtube_video.py
@@ -1,9 +1,9 @@
 from typing import Optional
-from embedchain.chunkers.base_chunker import BaseChunker
-from embedchain.config.AddConfig import ChunkerConfig

 from langchain.text_splitter import RecursiveCharacterTextSplitter

+from embedchain.chunkers.base_chunker import BaseChunker
+from embedchain.config.AddConfig import ChunkerConfig

 TEXT_SPLITTER_CHUNK_PARAMS = {
    "chunk_size": 2000,
@@ -13,7 +13,8 @@ TEXT_SPLITTER_CHUNK_PARAMS = {


 class YoutubeVideoChunker(BaseChunker):
-    ''' Chunker for Youtube video. '''
+    """Chunker for Youtube video."""
+
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS