featL AddConfig should allow configuring Chunker (#200)
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from embedchain.config import AddConfig
|
||||
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
||||
from embedchain.loaders.pdf_file import PdfFileLoader
|
||||
from embedchain.loaders.web_page import WebPageLoader
|
||||
@@ -18,11 +19,11 @@ class DataFormatter:
|
||||
loaders and chunkers to the data_type entered by the user in their
|
||||
.add or .add_local method call
|
||||
"""
|
||||
def __init__(self, data_type):
|
||||
self.loader = self._get_loader(data_type)
|
||||
self.chunker = self._get_chunker(data_type)
|
||||
|
||||
def _get_loader(self, data_type):
|
||||
def __init__(self, data_type: str, config: AddConfig):
|
||||
self.loader = self._get_loader(data_type, config.loader)
|
||||
self.chunker = self._get_chunker(data_type, config.chunker)
|
||||
|
||||
def _get_loader(self, data_type, config):
|
||||
"""
|
||||
Returns the appropriate data loader for the given data type.
|
||||
|
||||
@@ -43,7 +44,7 @@ class DataFormatter:
|
||||
else:
|
||||
raise ValueError(f"Unsupported data type: {data_type}")
|
||||
|
||||
def _get_chunker(self, data_type):
|
||||
def _get_chunker(self, data_type, config):
|
||||
"""
|
||||
Returns the appropriate chunker for the given data type.
|
||||
|
||||
@@ -52,15 +53,14 @@ class DataFormatter:
|
||||
:raises ValueError: If an unsupported data type is provided.
|
||||
"""
|
||||
chunkers = {
|
||||
'youtube_video': YoutubeVideoChunker(),
|
||||
'pdf_file': PdfFileChunker(),
|
||||
'web_page': WebPageChunker(),
|
||||
'qna_pair': QnaPairChunker(),
|
||||
'text': TextChunker(),
|
||||
'docx': DocxFileChunker(),
|
||||
'youtube_video': YoutubeVideoChunker(config),
|
||||
'pdf_file': PdfFileChunker(config),
|
||||
'web_page': WebPageChunker(config),
|
||||
'qna_pair': QnaPairChunker(config),
|
||||
'text': TextChunker(config),
|
||||
'docx': DocxFileChunker(config),
|
||||
}
|
||||
if data_type in chunkers:
|
||||
return chunkers[data_type]
|
||||
else:
|
||||
raise ValueError(f"Unsupported data type: {data_type}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user