docs: update docstrings (#565)
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.chunkers.docs_site import DocsSiteChunker
|
||||
from embedchain.chunkers.docx_file import DocxFileChunker
|
||||
from embedchain.chunkers.notion import NotionChunker
|
||||
@@ -8,7 +9,9 @@ from embedchain.chunkers.text import TextChunker
|
||||
from embedchain.chunkers.web_page import WebPageChunker
|
||||
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
||||
from embedchain.config import AddConfig
|
||||
from embedchain.config.AddConfig import ChunkerConfig, LoaderConfig
|
||||
from embedchain.helper_classes.json_serializable import JSONSerializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.loaders.csv import CsvLoader
|
||||
from embedchain.loaders.docs_site_loader import DocsSiteLoader
|
||||
from embedchain.loaders.docx_file import DocxFileLoader
|
||||
@@ -29,16 +32,28 @@ class DataFormatter(JSONSerializable):
|
||||
"""
|
||||
|
||||
def __init__(self, data_type: DataType, config: AddConfig):
|
||||
self.loader = self._get_loader(data_type, config.loader)
|
||||
self.chunker = self._get_chunker(data_type, config.chunker)
|
||||
"""
|
||||
Initialize a dataformatter, set data type and chunker based on datatype.
|
||||
|
||||
def _get_loader(self, data_type: DataType, config):
|
||||
:param data_type: The type of the data to load and chunk.
|
||||
:type data_type: DataType
|
||||
:param config: AddConfig instance with nested loader and chunker config attributes.
|
||||
:type config: AddConfig
|
||||
"""
|
||||
self.loader = self._get_loader(data_type=data_type, config=config.loader)
|
||||
self.chunker = self._get_chunker(data_type=data_type, config=config.chunker)
|
||||
|
||||
def _get_loader(self, data_type: DataType, config: LoaderConfig) -> BaseLoader:
|
||||
"""
|
||||
Returns the appropriate data loader for the given data type.
|
||||
|
||||
:param data_type: The type of the data to load.
|
||||
:return: The loader for the given data type.
|
||||
:type data_type: DataType
|
||||
:param config: Config to initialize the loader with.
|
||||
:type config: LoaderConfig
|
||||
:raises ValueError: If an unsupported data type is provided.
|
||||
:return: The loader for the given data type.
|
||||
:rtype: BaseLoader
|
||||
"""
|
||||
loaders = {
|
||||
DataType.YOUTUBE_VIDEO: YoutubeVideoLoader,
|
||||
@@ -53,8 +68,8 @@ class DataFormatter(JSONSerializable):
|
||||
}
|
||||
lazy_loaders = {DataType.NOTION}
|
||||
if data_type in loaders:
|
||||
loader_class = loaders[data_type]
|
||||
loader = loader_class()
|
||||
loader_class: type = loaders[data_type]
|
||||
loader: BaseLoader = loader_class()
|
||||
return loader
|
||||
elif data_type in lazy_loaders:
|
||||
if data_type == DataType.NOTION:
|
||||
@@ -66,13 +81,16 @@ class DataFormatter(JSONSerializable):
|
||||
else:
|
||||
raise ValueError(f"Unsupported data type: {data_type}")
|
||||
|
||||
def _get_chunker(self, data_type: DataType, config):
|
||||
"""
|
||||
Returns the appropriate chunker for the given data type.
|
||||
def _get_chunker(self, data_type: DataType, config: ChunkerConfig) -> BaseChunker:
|
||||
"""Returns the appropriate chunker for the given data type.
|
||||
|
||||
:param data_type: The type of the data to chunk.
|
||||
:return: The chunker for the given data type.
|
||||
:type data_type: DataType
|
||||
:param config: Config to initialize the chunker with.
|
||||
:type config: ChunkerConfig
|
||||
:raises ValueError: If an unsupported data type is provided.
|
||||
:return: The chunker for the given data type.
|
||||
:rtype: BaseChunker
|
||||
"""
|
||||
chunker_classes = {
|
||||
DataType.YOUTUBE_VIDEO: YoutubeVideoChunker,
|
||||
@@ -87,8 +105,8 @@ class DataFormatter(JSONSerializable):
|
||||
DataType.CSV: TableChunker,
|
||||
}
|
||||
if data_type in chunker_classes:
|
||||
chunker_class = chunker_classes[data_type]
|
||||
chunker = chunker_class(config)
|
||||
chunker_class: type = chunker_classes[data_type]
|
||||
chunker: BaseChunker = chunker_class(config)
|
||||
chunker.set_data_type(data_type)
|
||||
return chunker
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user