diff --git a/embedchain/llm/base.py b/embedchain/llm/base.py index 8013ed2c..2a1819c5 100644 --- a/embedchain/llm/base.py +++ b/embedchain/llm/base.py @@ -129,8 +129,12 @@ class BaseLlm(JSONSerializable): :return: Search results :rtype: Unknown """ - from langchain.tools import DuckDuckGoSearchRun - + try: + from langchain.tools import DuckDuckGoSearchRun + except ImportError: + raise ImportError( + 'Searching requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' + ) from None search = DuckDuckGoSearchRun() logging.info(f"Access search to get answers for {input_query}") return search.run(input_query) diff --git a/embedchain/loaders/docs_site_loader.py b/embedchain/loaders/docs_site_loader.py index 5f8ed164..2b880722 100644 --- a/embedchain/loaders/docs_site_loader.py +++ b/embedchain/loaders/docs_site_loader.py @@ -3,7 +3,14 @@ import logging from urllib.parse import urljoin, urlparse import requests -from bs4 import BeautifulSoup + +try: + from bs4 import BeautifulSoup +except ImportError: + raise ImportError( + 'DocsSite requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' + ) from None + from embedchain.helper.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader diff --git a/embedchain/loaders/docx_file.py b/embedchain/loaders/docx_file.py index 7dc2ce4a..59846c05 100644 --- a/embedchain/loaders/docx_file.py +++ b/embedchain/loaders/docx_file.py @@ -1,7 +1,11 @@ import hashlib -from langchain.document_loaders import Docx2txtLoader - +try: + from langchain.document_loaders import Docx2txtLoader +except ImportError: + raise ImportError( + 'Docx file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' + ) from None from embedchain.helper.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader diff --git a/embedchain/loaders/pdf_file.py b/embedchain/loaders/pdf_file.py index b5431d31..6b03554b 100644 --- a/embedchain/loaders/pdf_file.py +++ b/embedchain/loaders/pdf_file.py @@ -1,7 +1,11 @@ import hashlib -from langchain.document_loaders import PyPDFLoader - +try: + from langchain.document_loaders import PyPDFLoader +except ImportError: + raise ImportError( + 'PDF File requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' + ) from None from embedchain.helper.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader from embedchain.utils import clean_string diff --git a/embedchain/loaders/sitemap.py b/embedchain/loaders/sitemap.py index fb3657fa..3196027b 100644 --- a/embedchain/loaders/sitemap.py +++ b/embedchain/loaders/sitemap.py @@ -2,8 +2,14 @@ import hashlib import logging import requests -from bs4 import BeautifulSoup -from bs4.builder import ParserRejectedMarkup + +try: + from bs4 import BeautifulSoup + from bs4.builder import ParserRejectedMarkup +except ImportError: + raise ImportError( + 'Sitemap requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' + ) from None from embedchain.helper.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader diff --git a/embedchain/loaders/web_page.py b/embedchain/loaders/web_page.py index bf0d2416..98109d07 100644 --- a/embedchain/loaders/web_page.py +++ b/embedchain/loaders/web_page.py @@ -2,7 +2,13 @@ import hashlib import logging import requests -from bs4 import BeautifulSoup + +try: + from bs4 import BeautifulSoup +except ImportError: + raise ImportError( + 'Webpage requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' + ) from None from embedchain.helper.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader diff --git a/embedchain/loaders/xml.py b/embedchain/loaders/xml.py index 324a8c71..d200ffb2 100644 --- a/embedchain/loaders/xml.py +++ b/embedchain/loaders/xml.py @@ -1,7 +1,11 @@ import hashlib -from langchain.document_loaders import UnstructuredXMLLoader - +try: + from langchain.document_loaders import UnstructuredXMLLoader +except ImportError: + raise ImportError( + 'XML file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' + ) from None from embedchain.helper.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader from embedchain.utils import clean_string diff --git a/embedchain/loaders/youtube_video.py b/embedchain/loaders/youtube_video.py index 9b3ca30d..73cabcc2 100644 --- a/embedchain/loaders/youtube_video.py +++ b/embedchain/loaders/youtube_video.py @@ -1,7 +1,11 @@ import hashlib -from langchain.document_loaders import YoutubeLoader - +try: + from langchain.document_loaders import YoutubeLoader +except ImportError: + raise ImportError( + 'YouTube video requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' + ) from None from embedchain.helper.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader from embedchain.utils import clean_string diff --git a/pyproject.toml b/pyproject.toml index 2d9898fa..268b6d0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,13 +90,13 @@ python-dotenv = "^1.0.0" langchain = "^0.0.279" requests = "^2.31.0" openai = "^0.27.5" -tiktoken = "^0.4.0" +tiktoken = { version="^0.4.0", optional=true } chromadb ="^0.4.8" -youtube-transcript-api = "^0.6.1" -beautifulsoup4 = "^4.12.2" -pypdf = "^3.11.0" -pytube = "^15.0.0" -duckduckgo-search = "^3.8.5" +youtube-transcript-api = { version="^0.6.1", optional=true } +beautifulsoup4 = { version="^4.12.2", optional=true } +pypdf = { version="^3.11.0", optional=true } +pytube = { version="^15.0.0", optional=true } +duckduckgo-search = { version="^3.8.5", optional=true } llama-hub = { version = "^0.0.29", optional = true } sentence-transformers = { version = "^2.2.2", optional = true } torch = { version = "2.0.0", optional = true } @@ -111,15 +111,15 @@ fastapi-poe = { version = "0.0.16", optional = true } discord = { version = "^2.3.2", optional = true } slack-sdk = { version = "3.21.3", optional = true } cohere = { version = "^4.27", optional= true } -docx2txt = "^0.8" -pinecone-client = "^2.2.4" -unstructured = {extras = ["local-inference"], version = "^0.10.18"} +docx2txt = { version="^0.8", optional=true } +pinecone-client = { version = "^2.2.4", optional = true } +unstructured = {extras = ["local-inference"], version = "^0.10.18", optional=true} pillow = { version = "10.0.1", optional = true } torchvision = { version = ">=0.15.1, !=0.15.2", optional = true } ftfy = { version = "6.1.1", optional = true } regex = { version = "2023.8.8", optional = true } huggingface_hub = { version = "^0.17.3", optional = true } -pymilvus = "2.3.1" +pymilvus = { version="2.3.1", optional = true } [tool.poetry.group.dev.dependencies] black = "^23.3.0" @@ -149,6 +149,16 @@ images = ["torch", "ftfy", "regex", "pillow", "torchvision"] huggingface_hub=["huggingface_hub"] cohere = ["cohere"] milvus = ["pymilvus"] +dataloaders=[ + "youtube-transcripts-api", + "beautifulsoup4", + "pypdf", + "pytube", + "duckduckgo-search", + "docx2txt", + "unstructured", + "sentence-transformers", +] [tool.poetry.group.docs.dependencies]