refactor: Use src instead of url as argument value (#111)

This commit is contained in:
cachho
2023-07-07 12:44:44 +02:00
committed by GitHub
parent 5ae2a36305
commit 51adc5c886
2 changed files with 17 additions and 9 deletions

View File

@@ -5,16 +5,24 @@ class BaseChunker:
def __init__(self, text_splitter):
self.text_splitter = text_splitter
def create_chunks(self, loader, url):
def create_chunks(self, loader, src):
"""
Loads data and chunks it.
:param loader: The loader which's `load_data` method is used to create the raw data.
:param src: The data to be handled by the loader. Can be a URL for remote sources or local content for local loaders.
"""
documents = []
ids = []
datas = loader.load_data(url)
datas = loader.load_data(src)
metadatas = []
for data in datas:
content = data["content"]
meta_data = data["meta_data"]
chunks = self.text_splitter.split_text(content)
url = meta_data["url"]
chunks = self.text_splitter.split_text(content)
for chunk in chunks:
chunk_id = hashlib.sha256((chunk + url).encode()).hexdigest()
ids.append(chunk_id)