refactor: Use src instead of url as argument value (#111)
This commit is contained in:
@@ -5,16 +5,24 @@ class BaseChunker:
|
||||
def __init__(self, text_splitter):
|
||||
self.text_splitter = text_splitter
|
||||
|
||||
def create_chunks(self, loader, url):
|
||||
def create_chunks(self, loader, src):
|
||||
"""
|
||||
Loads data and chunks it.
|
||||
|
||||
:param loader: The loader which's `load_data` method is used to create the raw data.
|
||||
:param src: The data to be handled by the loader. Can be a URL for remote sources or local content for local loaders.
|
||||
"""
|
||||
documents = []
|
||||
ids = []
|
||||
datas = loader.load_data(url)
|
||||
datas = loader.load_data(src)
|
||||
metadatas = []
|
||||
for data in datas:
|
||||
content = data["content"]
|
||||
meta_data = data["meta_data"]
|
||||
chunks = self.text_splitter.split_text(content)
|
||||
url = meta_data["url"]
|
||||
|
||||
chunks = self.text_splitter.split_text(content)
|
||||
|
||||
for chunk in chunks:
|
||||
chunk_id = hashlib.sha256((chunk + url).encode()).hexdigest()
|
||||
ids.append(chunk_id)
|
||||
|
||||
Reference in New Issue
Block a user