diff --git a/embedchain/utils/misc.py b/embedchain/utils/misc.py index 88732bb8..0f117b32 100644 --- a/embedchain/utils/misc.py +++ b/embedchain/utils/misc.py @@ -201,10 +201,16 @@ def detect_datatype(source: Any) -> DataType: formatted_source = format_source(str(source), 30) if url: - from langchain.document_loaders.youtube import \ - ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS + YOUTUBE_ALLOWED_NETLOCKS = { + "www.youtube.com", + "m.youtube.com", + "youtu.be", + "youtube.com", + "vid.plus", + "www.youtube-nocookie.com", + } - if url.netloc in YOUTUBE_ALLOWED_NETLOCS: + if url.netloc in YOUTUBE_ALLOWED_NETLOCKS: logging.debug(f"Source of `{formatted_source}` detected as `youtube_video`.") return DataType.YOUTUBE_VIDEO