[Feature] Add support for directory loader as data source (#1008)

This commit is contained in:
Sidharth Mohanty
2023-12-15 05:24:34 +05:30
committed by GitHub
parent d54cdc5b00
commit 9303a1bf81
5 changed files with 69 additions and 5 deletions

View File

@@ -196,8 +196,7 @@ def detect_datatype(source: Any) -> DataType:
formatted_source = format_source(str(source), 30)
if url:
from langchain.document_loaders.youtube import \
ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS
from langchain.document_loaders.youtube import ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS
if url.netloc in YOUTUBE_ALLOWED_NETLOCS:
logging.debug(f"Source of `{formatted_source}` detected as `youtube_video`.")
@@ -303,6 +302,14 @@ def detect_datatype(source: Any) -> DataType:
logging.debug(f"Source of `{formatted_source}` detected as `mdx`.")
return DataType.MDX
if source.endswith(".txt"):
logging.debug(f"Source of `{formatted_source}` detected as `text`.")
return DataType.TEXT
if source.endswith(".pdf"):
logging.debug(f"Source of `{formatted_source}` detected as `pdf_file`.")
return DataType.PDF_FILE
if source.endswith(".yaml"):
with open(source, "r") as file:
yaml_content = yaml.safe_load(file)