docs: update docstrings (#565)

This commit is contained in:
cachho
2023-09-07 02:04:44 +02:00
committed by GitHub
parent 4754372fcd
commit 1ac8aef4de
25 changed files with 736 additions and 298 deletions

View File

@@ -1,3 +1,4 @@
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.chunkers.docs_site import DocsSiteChunker
from embedchain.chunkers.docx_file import DocxFileChunker
from embedchain.chunkers.notion import NotionChunker
@@ -8,7 +9,9 @@ from embedchain.chunkers.text import TextChunker
from embedchain.chunkers.web_page import WebPageChunker
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
from embedchain.config import AddConfig
from embedchain.config.AddConfig import ChunkerConfig, LoaderConfig
from embedchain.helper_classes.json_serializable import JSONSerializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.loaders.csv import CsvLoader
from embedchain.loaders.docs_site_loader import DocsSiteLoader
from embedchain.loaders.docx_file import DocxFileLoader
@@ -29,16 +32,28 @@ class DataFormatter(JSONSerializable):
"""
def __init__(self, data_type: DataType, config: AddConfig):
self.loader = self._get_loader(data_type, config.loader)
self.chunker = self._get_chunker(data_type, config.chunker)
"""
Initialize a dataformatter, set data type and chunker based on datatype.
def _get_loader(self, data_type: DataType, config):
:param data_type: The type of the data to load and chunk.
:type data_type: DataType
:param config: AddConfig instance with nested loader and chunker config attributes.
:type config: AddConfig
"""
self.loader = self._get_loader(data_type=data_type, config=config.loader)
self.chunker = self._get_chunker(data_type=data_type, config=config.chunker)
def _get_loader(self, data_type: DataType, config: LoaderConfig) -> BaseLoader:
"""
Returns the appropriate data loader for the given data type.
:param data_type: The type of the data to load.
:return: The loader for the given data type.
:type data_type: DataType
:param config: Config to initialize the loader with.
:type config: LoaderConfig
:raises ValueError: If an unsupported data type is provided.
:return: The loader for the given data type.
:rtype: BaseLoader
"""
loaders = {
DataType.YOUTUBE_VIDEO: YoutubeVideoLoader,
@@ -53,8 +68,8 @@ class DataFormatter(JSONSerializable):
}
lazy_loaders = {DataType.NOTION}
if data_type in loaders:
loader_class = loaders[data_type]
loader = loader_class()
loader_class: type = loaders[data_type]
loader: BaseLoader = loader_class()
return loader
elif data_type in lazy_loaders:
if data_type == DataType.NOTION:
@@ -66,13 +81,16 @@ class DataFormatter(JSONSerializable):
else:
raise ValueError(f"Unsupported data type: {data_type}")
def _get_chunker(self, data_type: DataType, config):
"""
Returns the appropriate chunker for the given data type.
def _get_chunker(self, data_type: DataType, config: ChunkerConfig) -> BaseChunker:
"""Returns the appropriate chunker for the given data type.
:param data_type: The type of the data to chunk.
:return: The chunker for the given data type.
:type data_type: DataType
:param config: Config to initialize the chunker with.
:type config: ChunkerConfig
:raises ValueError: If an unsupported data type is provided.
:return: The chunker for the given data type.
:rtype: BaseChunker
"""
chunker_classes = {
DataType.YOUTUBE_VIDEO: YoutubeVideoChunker,
@@ -87,8 +105,8 @@ class DataFormatter(JSONSerializable):
DataType.CSV: TableChunker,
}
if data_type in chunker_classes:
chunker_class = chunker_classes[data_type]
chunker = chunker_class(config)
chunker_class: type = chunker_classes[data_type]
chunker: BaseChunker = chunker_class(config)
chunker.set_data_type(data_type)
return chunker
else: