[Feature]: Unstructured File Loader Support - USF (#815)

This commit is contained in:
Muhammad Muzammil
2023-10-19 04:43:41 +05:00
committed by GitHub
parent c8846e0e93
commit 8b64deab40
5 changed files with 70 additions and 1 deletions

View File

@@ -6,6 +6,7 @@ from embedchain.chunkers.json import JSONChunker
from embedchain.chunkers.mdx import MdxChunker
from embedchain.chunkers.notion import NotionChunker
from embedchain.chunkers.pdf_file import PdfFileChunker
from embedchain.chunkers.unstructured_file import UnstructuredFileChunker
from embedchain.chunkers.qna_pair import QnaPairChunker
from embedchain.chunkers.sitemap import SitemapChunker
from embedchain.chunkers.table import TableChunker
@@ -30,6 +31,7 @@ from embedchain.loaders.sitemap import SitemapLoader
from embedchain.loaders.web_page import WebPageLoader
from embedchain.loaders.xml import XmlLoader
from embedchain.loaders.youtube_video import YoutubeVideoLoader
from embedchain.loaders.unstructured_file import UnstructuredLoader
from embedchain.models.data_type import DataType
@@ -77,6 +79,7 @@ class DataFormatter(JSONSerializable):
DataType.CSV: CsvLoader,
DataType.MDX: MdxLoader,
DataType.IMAGES: ImagesLoader,
DataType.UNSTRUCTURED: UnstructuredLoader,
DataType.JSON: JSONLoader,
}
lazy_loaders = {DataType.NOTION}
@@ -119,6 +122,7 @@ class DataFormatter(JSONSerializable):
DataType.MDX: MdxChunker,
DataType.IMAGES: ImagesChunker,
DataType.XML: XmlChunker,
DataType.UNSTRUCTURED: UnstructuredFileChunker,
DataType.JSON: JSONChunker,
}
if data_type in chunker_classes: