Resolve conflicts (#208)

This commit is contained in:
Deshraj Yadav
2023-07-10 21:50:05 -07:00
committed by GitHub
parent 6936d6983d
commit 9ca836520f
32 changed files with 396 additions and 207 deletions

View File

@@ -3,15 +3,17 @@ import hashlib
class BaseChunker:
def __init__(self, text_splitter):
''' Initialize the chunker. '''
"""Initialize the chunker."""
self.text_splitter = text_splitter
def create_chunks(self, loader, src):
"""
Loads data and chunks it.
:param loader: The loader which's `load_data` method is used to create the raw data.
:param src: The data to be handled by the loader. Can be a URL for remote sources or local content for local loaders.
:param loader: The loader which's `load_data` method is used to create
the raw data.
:param src: The data to be handled by the loader. Can be a URL for
remote sources or local content for local loaders.
"""
documents = []
ids = []
@@ -27,7 +29,7 @@ class BaseChunker:
for chunk in chunks:
chunk_id = hashlib.sha256((chunk + url).encode()).hexdigest()
if (idMap.get(chunk_id) is None):
if idMap.get(chunk_id) is None:
idMap[chunk_id] = True
ids.append(chunk_id)
documents.append(chunk)