featL AddConfig should allow configuring Chunker (#200)

This commit is contained in:
Anupam Singh
2023-07-11 04:23:56 +05:30
committed by GitHub
parent ae87dc4a6d
commit eda28cc491
10 changed files with 120 additions and 39 deletions

View File

@@ -1,3 +1,4 @@
from embedchain.config import AddConfig
from embedchain.loaders.youtube_video import YoutubeVideoLoader
from embedchain.loaders.pdf_file import PdfFileLoader
from embedchain.loaders.web_page import WebPageLoader
@@ -18,11 +19,11 @@ class DataFormatter:
loaders and chunkers to the data_type entered by the user in their
.add or .add_local method call
"""
def __init__(self, data_type):
self.loader = self._get_loader(data_type)
self.chunker = self._get_chunker(data_type)
def _get_loader(self, data_type):
def __init__(self, data_type: str, config: AddConfig):
self.loader = self._get_loader(data_type, config.loader)
self.chunker = self._get_chunker(data_type, config.chunker)
def _get_loader(self, data_type, config):
"""
Returns the appropriate data loader for the given data type.
@@ -43,7 +44,7 @@ class DataFormatter:
else:
raise ValueError(f"Unsupported data type: {data_type}")
def _get_chunker(self, data_type):
def _get_chunker(self, data_type, config):
"""
Returns the appropriate chunker for the given data type.
@@ -52,15 +53,14 @@ class DataFormatter:
:raises ValueError: If an unsupported data type is provided.
"""
chunkers = {
'youtube_video': YoutubeVideoChunker(),
'pdf_file': PdfFileChunker(),
'web_page': WebPageChunker(),
'qna_pair': QnaPairChunker(),
'text': TextChunker(),
'docx': DocxFileChunker(),
'youtube_video': YoutubeVideoChunker(config),
'pdf_file': PdfFileChunker(config),
'web_page': WebPageChunker(config),
'qna_pair': QnaPairChunker(config),
'text': TextChunker(config),
'docx': DocxFileChunker(config),
}
if data_type in chunkers:
return chunkers[data_type]
else:
raise ValueError(f"Unsupported data type: {data_type}")