Add support for image dataset (#571)

Co-authored-by: Rupesh Bansal <rupeshbansal@Shankars-MacBook-Air.local>
This commit is contained in:
Rupesh Bansal
2023-10-04 09:50:40 +05:30
committed by GitHub
parent 55e9a1cbd6
commit d0af018b8d
19 changed files with 498 additions and 31 deletions

View File

@@ -2,6 +2,7 @@ from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.chunkers.docs_site import DocsSiteChunker
from embedchain.chunkers.docx_file import DocxFileChunker
from embedchain.chunkers.mdx import MdxChunker
from embedchain.chunkers.images import ImagesChunker
from embedchain.chunkers.notion import NotionChunker
from embedchain.chunkers.pdf_file import PdfFileChunker
from embedchain.chunkers.qna_pair import QnaPairChunker
@@ -16,6 +17,7 @@ from embedchain.loaders.base_loader import BaseLoader
from embedchain.loaders.csv import CsvLoader
from embedchain.loaders.docs_site_loader import DocsSiteLoader
from embedchain.loaders.docx_file import DocxFileLoader
from embedchain.loaders.images import ImagesLoader
from embedchain.loaders.local_qna_pair import LocalQnaPairLoader
from embedchain.loaders.local_text import LocalTextLoader
from embedchain.loaders.mdx import MdxLoader
@@ -68,6 +70,7 @@ class DataFormatter(JSONSerializable):
DataType.DOCS_SITE: DocsSiteLoader,
DataType.CSV: CsvLoader,
DataType.MDX: MdxLoader,
DataType.IMAGES: ImagesLoader,
}
lazy_loaders = {DataType.NOTION}
if data_type in loaders:
@@ -102,11 +105,11 @@ class DataFormatter(JSONSerializable):
DataType.QNA_PAIR: QnaPairChunker,
DataType.TEXT: TextChunker,
DataType.DOCX: DocxFileChunker,
DataType.WEB_PAGE: WebPageChunker,
DataType.DOCS_SITE: DocsSiteChunker,
DataType.NOTION: NotionChunker,
DataType.CSV: TableChunker,
DataType.MDX: MdxChunker,
DataType.IMAGES: ImagesChunker,
}
if data_type in chunker_classes:
chunker_class: type = chunker_classes[data_type]