fix: url metadata for all datatypes (#613)

This commit is contained in:
cachho
2023-09-13 19:19:48 +02:00
committed by GitHub
parent 701d0b21ef
commit 79efa51941
4 changed files with 116 additions and 16 deletions

View File

@@ -1,15 +1,47 @@
from enum import Enum
class DataType(Enum):
class DirectDataType(Enum):
"""
DirectDataType enum contains data types that contain raw data directly.
"""
TEXT = "text"
class IndirectDataType(Enum):
"""
IndirectDataType enum contains data types that contain references to data stored elsewhere.
"""
YOUTUBE_VIDEO = "youtube_video"
PDF_FILE = "pdf_file"
WEB_PAGE = "web_page"
SITEMAP = "sitemap"
DOCX = "docx"
DOCS_SITE = "docs_site"
TEXT = "text"
QNA_PAIR = "qna_pair"
NOTION = "notion"
CSV = "csv"
MDX = "mdx"
class SpecialDataType(Enum):
"""
SpecialDataType enum contains data types that are neither direct nor indirect, or simply require special attention.
"""
QNA_PAIR = "qna_pair"
class DataType(Enum):
TEXT = DirectDataType.TEXT.value
YOUTUBE_VIDEO = IndirectDataType.YOUTUBE_VIDEO.value
PDF_FILE = IndirectDataType.PDF_FILE.value
WEB_PAGE = IndirectDataType.WEB_PAGE.value
SITEMAP = IndirectDataType.SITEMAP.value
DOCX = IndirectDataType.DOCX.value
DOCS_SITE = IndirectDataType.DOCS_SITE.value
NOTION = IndirectDataType.NOTION.value
CSV = IndirectDataType.CSV.value
MDX = IndirectDataType.MDX.value
QNA_PAIR = SpecialDataType.QNA_PAIR.value