Remove unwanted libraries and lighten package (#1391)

This commit is contained in:
Dev Khant
2024-07-09 04:30:16 +05:30
committed by GitHub
parent ebbf90f4aa
commit 1a5d0d236a
28 changed files with 235 additions and 1541 deletions

View File

@@ -9,9 +9,13 @@ PROJECT_NAME := embedchain
install: install:
poetry install poetry install
# TODO: use a more efficient way to install these packages
install_all: install_all:
poetry install --all-extras poetry install --all-extras
poetry run pip install pinecone-text pinecone-client langchain-anthropic "unstructured[local-inference, all-docs]" ollama langchain_together==0.1.3 langchain_cohere==0.1.5 deepgram-sdk==3.2.7 langchain-huggingface psutil poetry run pip install pinecone-text pinecone-client langchain-anthropic "unstructured[local-inference, all-docs]" ollama langchain_together==0.1.3 \
langchain_cohere==0.1.5 deepgram-sdk==3.2.7 langchain-huggingface psutil clarifai==10.0.1 flask==2.3.3 twilio==8.5.0 fastapi-poe==0.0.16 discord==2.3.2 \
slack-sdk==3.21.3 huggingface_hub==0.23.0 gitpython==3.1.38 yt_dlp==2023.11.14 PyGithub==1.59.1 feedparser==6.0.10 newspaper3k==0.2.8 listparser==0.19 \
modal==0.56.4329 dropbox==11.36.2 boto3==1.34.20 youtube-transcript-api==0.6.1 pytube==15.0.0 beautifulsoup4==4.12.3
install_es: install_es:
poetry install --extras elasticsearch poetry install --extras elasticsearch

View File

@@ -7,7 +7,7 @@ title: '🔮 Poe Bot'
1. Install embedchain python package: 1. Install embedchain python package:
```bash ```bash
pip install --upgrade "embedchain[poe]" pip install fastapi-poe==0.0.16
``` ```
2. Create a free account on [Poe](https://www.poe.com?utm_source=embedchain). 2. Create a free account on [Poe](https://www.poe.com?utm_source=embedchain).

View File

@@ -12,8 +12,7 @@ try:
from discord.ext import commands from discord.ext import commands
except ModuleNotFoundError: except ModuleNotFoundError:
raise ModuleNotFoundError( raise ModuleNotFoundError(
"The required dependencies for Discord are not installed." "The required dependencies for Discord are not installed." "Please install with `pip install discord==2.3.2`"
'Please install with `pip install "embedchain[discord]"`'
) from None ) from None

View File

@@ -11,7 +11,7 @@ try:
from fastapi_poe import PoeBot, run from fastapi_poe import PoeBot, run
except ModuleNotFoundError: except ModuleNotFoundError:
raise ModuleNotFoundError( raise ModuleNotFoundError(
"The required dependencies for Poe are not installed." 'Please install with `pip install "embedchain[poe]"`' "The required dependencies for Poe are not installed." "Please install with `pip install fastapi-poe==0.0.16`"
) from None ) from None

View File

@@ -15,7 +15,7 @@ try:
except ModuleNotFoundError: except ModuleNotFoundError:
raise ModuleNotFoundError( raise ModuleNotFoundError(
"The required dependencies for Slack are not installed." "The required dependencies for Slack are not installed."
'Please install with `pip install --upgrade "embedchain[slack]"`' "Please install with `pip install slack-sdk==3.21.3 flask==2.3.3`"
) from None ) from None

View File

@@ -20,7 +20,7 @@ class WhatsAppBot(BaseBot):
except ModuleNotFoundError: except ModuleNotFoundError:
raise ModuleNotFoundError( raise ModuleNotFoundError(
"The required dependencies for WhatsApp are not installed. " "The required dependencies for WhatsApp are not installed. "
'Please install with `pip install --upgrade "embedchain[whatsapp]"`' "Please install with `pip install twilio==8.5.0 flask==2.3.3`"
) from None ) from None
super().__init__() super().__init__()

View File

@@ -8,7 +8,18 @@ load_dotenv(".env")
image = Image.debian_slim().pip_install( image = Image.debian_slim().pip_install(
"embedchain", "embedchain",
"embedchain[dataloaders]", "lanchain_community==0.2.6",
"youtube-transcript-api==0.6.1",
"pytube==15.0.0",
"beautifulsoup4==4.12.3",
"slack-sdk==3.21.3",
"huggingface_hub==0.23.0",
"gitpython==3.1.38",
"yt_dlp==2023.11.14",
"PyGithub==1.59.1",
"feedparser==6.0.10",
"newspaper3k==0.2.8",
"listparser==0.19",
) )
stub = Stub( stub = Stub(

View File

@@ -23,7 +23,7 @@ class AWSBedrockLlm(BaseLlm):
except ModuleNotFoundError: except ModuleNotFoundError:
raise ModuleNotFoundError( raise ModuleNotFoundError(
"The required dependencies for AWSBedrock are not installed." "The required dependencies for AWSBedrock are not installed."
'Please install with `pip install --upgrade "embedchain[aws-bedrock]"`' "Please install with `pip install boto3==1.34.20`"
) from None ) from None
self.boto_client = boto3.client("bedrock-runtime", "us-west-2" or os.environ.get("AWS_REGION")) self.boto_client = boto3.client("bedrock-runtime", "us-west-2" or os.environ.get("AWS_REGION"))
@@ -38,8 +38,7 @@ class AWSBedrockLlm(BaseLlm):
} }
if config.stream: if config.stream:
from langchain.callbacks.streaming_stdout import \ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
StreamingStdOutCallbackHandler
callbacks = [StreamingStdOutCallbackHandler()] callbacks = [StreamingStdOutCallbackHandler()]
llm = Bedrock(**kwargs, streaming=config.stream, callbacks=callbacks) llm = Bedrock(**kwargs, streaming=config.stream, callbacks=callbacks)

View File

@@ -180,7 +180,7 @@ class BaseLlm(JSONSerializable):
from langchain.tools import DuckDuckGoSearchRun from langchain.tools import DuckDuckGoSearchRun
except ImportError: except ImportError:
raise ImportError( raise ImportError(
'Searching requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' "Searching requires extra dependencies. Install with `pip install duckduckgo-search==6.1.5`"
) from None ) from None
search = DuckDuckGoSearchRun() search = DuckDuckGoSearchRun()
logger.info(f"Access search to get answers for {input_query}") logger.info(f"Access search to get answers for {input_query}")

View File

@@ -24,7 +24,7 @@ class ClarifaiLlm(BaseLlm):
except ModuleNotFoundError: except ModuleNotFoundError:
raise ModuleNotFoundError( raise ModuleNotFoundError(
"The required dependencies for Clarifai are not installed." "The required dependencies for Clarifai are not installed."
'Please install with `pip install --upgrade "embedchain[clarifai]"`' "Please install with `pip install clarifai==10.0.1`"
) from None ) from None
model_name = config.model model_name = config.model

View File

@@ -22,7 +22,7 @@ class HuggingFaceLlm(BaseLlm):
except ModuleNotFoundError: except ModuleNotFoundError:
raise ModuleNotFoundError( raise ModuleNotFoundError(
"The required dependencies for HuggingFaceHub are not installed." "The required dependencies for HuggingFaceHub are not installed."
'Please install with `pip install --upgrade "embedchain[huggingface-hub]"`' "Please install with `pip install huggingface-hub==0.23.0`"
) from None ) from None
super().__init__(config=config) super().__init__(config=config)

View File

@@ -24,7 +24,7 @@ class BeehiivLoader(BaseLoader):
from bs4.builder import ParserRejectedMarkup from bs4.builder import ParserRejectedMarkup
except ImportError: except ImportError:
raise ImportError( raise ImportError(
'Beehiiv requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' "Beehiiv requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
) from None ) from None
if not url.endswith("sitemap.xml"): if not url.endswith("sitemap.xml"):

View File

@@ -8,7 +8,7 @@ try:
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
except ImportError: except ImportError:
raise ImportError( raise ImportError(
'DocsSite requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' "DocsSite requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
) from None ) from None

View File

@@ -3,9 +3,7 @@ import hashlib
try: try:
from langchain_community.document_loaders import Docx2txtLoader from langchain_community.document_loaders import Docx2txtLoader
except ImportError: except ImportError:
raise ImportError( raise ImportError("Docx file requires extra dependencies. Install with `pip install docx2txt==0.8`") from None
'Docx file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
) from None
from embedchain.helpers.json_serializable import register_deserializable from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader from embedchain.loaders.base_loader import BaseLoader

View File

@@ -17,9 +17,7 @@ class DropboxLoader(BaseLoader):
try: try:
from dropbox import Dropbox, exceptions from dropbox import Dropbox, exceptions
except ImportError: except ImportError:
raise ImportError( raise ImportError("Dropbox requires extra dependencies. Install with `pip install dropbox==11.36.2`")
'Dropbox requires extra dependencies. Install with `pip install --upgrade "embedchain[dropbox]"`'
)
try: try:
dbx = Dropbox(access_token) dbx = Dropbox(access_token)

View File

@@ -30,7 +30,8 @@ class GithubLoader(BaseLoader):
from github import Github from github import Github
except ImportError as e: except ImportError as e:
raise ValueError( raise ValueError(
"GithubLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[github]'`" "GithubLoader requires extra dependencies. \
Install with `pip install gitpython==3.1.38 PyGithub==1.59.1`"
) from e ) from e
self.config = config self.config = config

View File

@@ -1,11 +1,6 @@
import hashlib import hashlib
try: from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFLoader
except ImportError:
raise ImportError(
'PDF File requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
) from None
from embedchain.helpers.json_serializable import register_deserializable from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils.misc import clean_string from embedchain.utils.misc import clean_string

View File

@@ -28,12 +28,11 @@ class RSSFeedLoader(BaseLoader):
@staticmethod @staticmethod
def get_rss_content(url: str): def get_rss_content(url: str):
try: try:
from langchain_community.document_loaders import \ from langchain_community.document_loaders import RSSFeedLoader as LangchainRSSFeedLoader
RSSFeedLoader as LangchainRSSFeedLoader
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"""RSSFeedLoader file requires extra dependencies. """RSSFeedLoader file requires extra dependencies.
Install with `pip install --upgrade "embedchain[rss_feed]"`""" Install with `pip install feedparser==6.0.10 newspaper3k==0.2.8 listparser==0.19`"""
) from None ) from None
output = [] output = []

View File

@@ -12,7 +12,7 @@ try:
from bs4.builder import ParserRejectedMarkup from bs4.builder import ParserRejectedMarkup
except ImportError: except ImportError:
raise ImportError( raise ImportError(
'Sitemap requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' "Sitemap requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
) from None ) from None
from embedchain.helpers.json_serializable import register_deserializable from embedchain.helpers.json_serializable import register_deserializable

View File

@@ -24,7 +24,7 @@ class SubstackLoader(BaseLoader):
from bs4.builder import ParserRejectedMarkup from bs4.builder import ParserRejectedMarkup
except ImportError: except ImportError:
raise ImportError( raise ImportError(
'Substack requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' "Substack requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
) from None ) from None
if not url.endswith("sitemap.xml"): if not url.endswith("sitemap.xml"):

View File

@@ -7,7 +7,7 @@ try:
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
except ImportError: except ImportError:
raise ImportError( raise ImportError(
'Webpage requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' "Webpage requires extra dependencies. Install with `pip install beautifulsoup4==4.12.3`"
) from None ) from None
from embedchain.helpers.json_serializable import register_deserializable from embedchain.helpers.json_serializable import register_deserializable

View File

@@ -18,7 +18,7 @@ class YoutubeChannelLoader(BaseLoader):
import yt_dlp import yt_dlp
except ImportError as e: except ImportError as e:
raise ValueError( raise ValueError(
"YoutubeLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[youtube_channel]'`" # noqa: E501 "YoutubeChannelLoader requires extra dependencies. Install with `pip install yt_dlp==2023.11.14 youtube-transcript-api==0.6.1`" # noqa: E501
) from e ) from e
data = [] data = []

View File

@@ -5,14 +5,12 @@ import logging
try: try:
from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api import YouTubeTranscriptApi
except ImportError: except ImportError:
raise ImportError('YouTube video requires extra dependencies. Install with `pip install youtube-transcript-api "`') raise ImportError("YouTube video requires extra dependencies. Install with `pip install youtube-transcript-api`")
try: try:
from langchain_community.document_loaders import YoutubeLoader from langchain_community.document_loaders import YoutubeLoader
from langchain_community.document_loaders.youtube import _parse_video_id from langchain_community.document_loaders.youtube import _parse_video_id
except ImportError: except ImportError:
raise ImportError( raise ImportError("YouTube video requires extra dependencies. Install with `pip install pytube==15.0.0`") from None
'YouTube video requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
) from None
from embedchain.helpers.json_serializable import register_deserializable from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils.misc import clean_string from embedchain.utils.misc import clean_string

View File

@@ -1,2 +1,12 @@
flask==2.3.2 flask==2.3.2
embedchain[dataloaders]==0.0.78 youtube-transcript-api==0.6.1
pytube==15.0.0
beautifulsoup4==4.12.3
slack-sdk==3.21.3
huggingface_hub==0.23.0
gitpython==3.1.38
yt_dlp==2023.11.14
PyGithub==1.59.1
feedparser==6.0.10
newspaper3k==0.2.8
listparser==0.19

View File

@@ -1,6 +1,24 @@
fastapi==0.104.0 fastapi==0.104.0
uvicorn==0.23.2 uvicorn==0.23.2
streamlit==1.29.0
embedchain==0.1.3 embedchain==0.1.3
embedchain[streamlit, community, opensource, elasticsearch, opensearch, poe, discord, slack, whatsapp, weaviate, pinecone, qdrant, images, huggingface_hub, cohere, together, milvus, dataloaders, vertexai, llama2, gmail, json]==0.1.3 slack-sdk==3.21.3
flask==2.3.3
fastapi-poe==0.0.16
discord==2.3.2
twilio==8.5.0
huggingface-hub==0.17.3
embedchain[community, opensource, elasticsearch, opensearch, weaviate, pinecone, qdrant, images, cohere, together, milvus, vertexai, llama2, gmail, json]==0.1.3
sqlalchemy==2.0.22 sqlalchemy==2.0.22
python-multipart==0.0.6 python-multipart==0.0.6
youtube-transcript-api==0.6.1
pytube==15.0.0
beautifulsoup4==4.12.3
slack-sdk==3.21.3
huggingface_hub==0.23.0
gitpython==3.1.38
yt_dlp==2023.11.14
PyGithub==1.59.1
feedparser==6.0.10
newspaper3k==0.2.8
listparser==0.19

View File

@@ -1 +1,3 @@
embedchain[slack, poe]==0.1.7 slack-sdk==3.21.3
flask==2.3.3
fastapi-poe==0.0.16

1612
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -105,9 +105,6 @@ gptcache = "^0.1.43"
pysbd = "^0.3.4" pysbd = "^0.3.4"
memzero = "^0.0.7" memzero = "^0.0.7"
tiktoken = { version = "^0.7.0", optional = true } tiktoken = { version = "^0.7.0", optional = true }
youtube-transcript-api = { version = "^0.6.1", optional = true }
pytube = { version = "^15.0.0", optional = true }
duckduckgo-search = { version = "^6.1.5", optional = true }
sentence-transformers = { version = "^2.2.2", optional = true } sentence-transformers = { version = "^2.2.2", optional = true }
torch = { version = "2.3.0", optional = true } torch = { version = "2.3.0", optional = true }
# Torch 2.0.1 is not compatible with poetry (https://github.com/pytorch/pytorch/issues/100974) # Torch 2.0.1 is not compatible with poetry (https://github.com/pytorch/pytorch/issues/100974)
@@ -115,19 +112,11 @@ gpt4all = { version = "2.0.2", optional = true }
# 1.0.9 is not working for some users (https://github.com/nomic-ai/gpt4all/issues/1394) # 1.0.9 is not working for some users (https://github.com/nomic-ai/gpt4all/issues/1394)
opensearch-py = { version = "2.3.1", optional = true } opensearch-py = { version = "2.3.1", optional = true }
elasticsearch = { version = "^8.9.0", optional = true } elasticsearch = { version = "^8.9.0", optional = true }
flask = { version = "^2.3.3", optional = true }
twilio = { version = "^8.5.0", optional = true }
fastapi-poe = { version = "0.0.16", optional = true }
discord = { version = "^2.3.2", optional = true }
slack-sdk = { version = "3.21.3", optional = true }
clarifai = { version = "^10.0.1", optional = true }
cohere = { version = "^5.3", optional = true } cohere = { version = "^5.3", optional = true }
together = { version = "^0.2.8", optional = true } together = { version = "^0.2.8", optional = true }
lancedb = { version = "^0.6.2", optional = true } lancedb = { version = "^0.6.2", optional = true }
weaviate-client = { version = "^3.24.1", optional = true } weaviate-client = { version = "^3.24.1", optional = true }
docx2txt = { version = "^0.8", optional = true }
qdrant-client = { version = "^1.6.3", optional = true } qdrant-client = { version = "^1.6.3", optional = true }
huggingface_hub = { version = "^0.17.3", optional = true }
pymilvus = { version = "2.4.3", optional = true } pymilvus = { version = "2.4.3", optional = true }
google-cloud-aiplatform = { version = "^1.26.1", optional = true } google-cloud-aiplatform = { version = "^1.26.1", optional = true }
replicate = { version = "^0.15.4", optional = true } replicate = { version = "^0.15.4", optional = true }
@@ -136,21 +125,12 @@ psycopg = { version = "^3.1.12", optional = true }
psycopg-binary = { version = "^3.1.12", optional = true } psycopg-binary = { version = "^3.1.12", optional = true }
psycopg-pool = { version = "^3.1.8", optional = true } psycopg-pool = { version = "^3.1.8", optional = true }
mysql-connector-python = { version = "^8.1.0", optional = true } mysql-connector-python = { version = "^8.1.0", optional = true }
gitpython = { version = "^3.1.38", optional = true }
yt_dlp = { version = "^2023.11.14", optional = true }
PyGithub = { version = "^1.59.1", optional = true }
feedparser = { version = "^6.0.10", optional = true }
newspaper3k = { version = "^0.2.8", optional = true }
listparser = { version = "^0.19", optional = true }
google-generativeai = { version = "^0.3.0", optional = true } google-generativeai = { version = "^0.3.0", optional = true }
modal = { version = "^0.56.4329", optional = true }
dropbox = { version = "^11.36.2", optional = true }
google-api-python-client = { version = "^2.111.0", optional = true } google-api-python-client = { version = "^2.111.0", optional = true }
google-auth-oauthlib = { version = "^1.2.0", optional = true } google-auth-oauthlib = { version = "^1.2.0", optional = true }
google-auth = { version = "^2.25.2", optional = true } google-auth = { version = "^2.25.2", optional = true }
google-auth-httplib2 = { version = "^0.2.0", optional = true } google-auth-httplib2 = { version = "^0.2.0", optional = true }
google-api-core = { version = "^2.15.0", optional = true } google-api-core = { version = "^2.15.0", optional = true }
boto3 = { version = "^1.34.20", optional = true }
langchain-mistralai = { version = "^0.1.9", optional = true } langchain-mistralai = { version = "^0.1.9", optional = true }
langchain-openai = "^0.1.7" langchain-openai = "^0.1.7"
langchain-google-vertexai = { version = "^1.0.6", optional = true } langchain-google-vertexai = { version = "^1.0.6", optional = true }
@@ -174,27 +154,14 @@ mock = "^5.1.0"
pytest-asyncio = "^0.21.1" pytest-asyncio = "^0.21.1"
[tool.poetry.extras] [tool.poetry.extras]
streamlit = ["streamlit"]
opensource = ["sentence-transformers", "torch", "gpt4all"] opensource = ["sentence-transformers", "torch", "gpt4all"]
lancedb = ["lancedb"] lancedb = ["lancedb"]
elasticsearch = ["elasticsearch"] elasticsearch = ["elasticsearch"]
opensearch = ["opensearch-py"] opensearch = ["opensearch-py"]
poe = ["fastapi-poe"]
discord = ["discord"]
slack = ["slack-sdk", "flask"]
whatsapp = ["twilio", "flask"]
weaviate = ["weaviate-client"] weaviate = ["weaviate-client"]
qdrant = ["qdrant-client"] qdrant = ["qdrant-client"]
together = ["together"] together = ["together"]
huggingface_hub=["huggingface_hub"]
milvus = ["pymilvus"] milvus = ["pymilvus"]
dataloaders=[
"youtube-transcript-api",
"docx2txt",
"duckduckgo-search",
"pytube",
"sentence-transformers"
]
vertexai = ["langchain-google-vertexai"] vertexai = ["langchain-google-vertexai"]
llama2 = ["replicate"] llama2 = ["replicate"]
gmail = [ gmail = [
@@ -208,20 +175,7 @@ gmail = [
googledrive = ["google-api-python-client", "google-auth-oauthlib", "google-auth-httplib2"] googledrive = ["google-api-python-client", "google-auth-oauthlib", "google-auth-httplib2"]
postgres = ["psycopg", "psycopg-binary", "psycopg-pool"] postgres = ["psycopg", "psycopg-binary", "psycopg-pool"]
mysql = ["mysql-connector-python"] mysql = ["mysql-connector-python"]
github = ["PyGithub", "gitpython"]
youtube = [
"yt_dlp",
"youtube-transcript-api",
]
rss_feed = [
"feedparser",
"listparser",
"newspaper3k"
]
google = ["google-generativeai"] google = ["google-generativeai"]
modal = ["modal"]
dropbox = ["dropbox"]
aws_bedrock = ["boto3"]
mistralai = ["langchain-mistralai"] mistralai = ["langchain-mistralai"]
[tool.poetry.group.docs.dependencies] [tool.poetry.group.docs.dependencies]