[Bugfix] fix poetry lock (#960)

This commit is contained in:
Deven Patel
2023-11-16 13:30:38 -08:00
committed by GitHub
parent c93e49d2b8
commit 28460f725c
4 changed files with 29 additions and 28 deletions

View File

@@ -68,7 +68,7 @@ class GithubLoader(BaseLoader):
data.extend(results) data.extend(results)
data_urls.extend([result.get("meta_data").get("url") for result in results]) data_urls.extend([result.get("meta_data").get("url") for result in results])
except Exception as e: except Exception as e:
logging.error(f"Failed to process {file}: {e}") logging.warn(f"Failed to process {file}: {e}")
source_hash = hashlib.sha256(repo_url.encode()).hexdigest() source_hash = hashlib.sha256(repo_url.encode()).hexdigest()
repo_path = f"/tmp/{source_hash}" repo_path = f"/tmp/{source_hash}"

View File

@@ -1,11 +1,5 @@
import hashlib import hashlib
try:
from langchain.document_loaders import UnstructuredFileLoader
except ImportError:
raise ImportError(
'Unstructured file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
) from None
from embedchain.helper.json_serializable import register_deserializable from embedchain.helper.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string from embedchain.utils import clean_string
@@ -15,6 +9,13 @@ from embedchain.utils import clean_string
class UnstructuredLoader(BaseLoader): class UnstructuredLoader(BaseLoader):
def load_data(self, url): def load_data(self, url):
"""Load data from a Unstructured file.""" """Load data from a Unstructured file."""
try:
from langchain.document_loaders import UnstructuredFileLoader
except ImportError:
raise ImportError(
'Unstructured file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`' # noqa: E501
) from None
loader = UnstructuredFileLoader(url) loader = UnstructuredFileLoader(url)
data = [] data = []
all_content = [] all_content = []

32
poetry.lock generated
View File

@@ -6892,30 +6892,30 @@ backoff = "*"
beautifulsoup4 = "*" beautifulsoup4 = "*"
chardet = "*" chardet = "*"
dataclasses-json = "*" dataclasses-json = "*"
ebooklib = {version = "*", optional = true, markers = "extra == \"local-inference\""} ebooklib = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
emoji = "*" emoji = "*"
filetype = "*" filetype = "*"
langdetect = "*" langdetect = "*"
lxml = "*" lxml = "*"
markdown = {version = "*", optional = true, markers = "extra == \"local-inference\""} markdown = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
msg-parser = {version = "*", optional = true, markers = "extra == \"local-inference\""} msg-parser = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
nltk = "*" nltk = "*"
numpy = "*" numpy = "*"
openpyxl = {version = "*", optional = true, markers = "extra == \"local-inference\""} openpyxl = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
pandas = {version = "*", optional = true, markers = "extra == \"local-inference\""} pandas = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
pdf2image = {version = "*", optional = true, markers = "extra == \"local-inference\""} pdf2image = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
"pdfminer.six" = {version = "*", optional = true, markers = "extra == \"local-inference\""} "pdfminer.six" = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
pypandoc = {version = "*", optional = true, markers = "extra == \"local-inference\""} pypandoc = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
python-docx = {version = ">=1.0.1", optional = true, markers = "extra == \"local-inference\""} python-docx = {version = ">=1.0.1", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
python-iso639 = "*" python-iso639 = "*"
python-magic = "*" python-magic = "*"
python-pptx = {version = "<=0.6.21", optional = true, markers = "extra == \"local-inference\""} python-pptx = {version = "<=0.6.21", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
rapidfuzz = "*" rapidfuzz = "*"
requests = "*" requests = "*"
tabulate = "*" tabulate = "*"
unstructured-inference = {version = "0.7.3", optional = true, markers = "extra == \"local-inference\""} unstructured-inference = {version = "0.7.3", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
"unstructured.pytesseract" = {version = ">=0.3.12", optional = true, markers = "extra == \"local-inference\""} "unstructured.pytesseract" = {version = ">=0.3.12", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
xlrd = {version = "*", optional = true, markers = "extra == \"local-inference\""} xlrd = {version = "*", optional = true, markers = "extra == \"all-docs\" or extra == \"local-inference\""}
[package.extras] [package.extras]
airtable = ["pyairtable"] airtable = ["pyairtable"]
@@ -7641,7 +7641,7 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[extras] [extras]
cohere = ["cohere"] cohere = ["cohere"]
community = ["llama-hub"] community = ["llama-hub"]
dataloaders = ["beautifulsoup4", "docx2txt", "duckduckgo-search", "pypdf", "pytube", "sentence-transformers", "unstructured"] dataloaders = ["beautifulsoup4", "docx2txt", "duckduckgo-search", "pypdf", "pytube", "sentence-transformers", "unstructured", "youtube-transcript-api"]
discord = ["discord"] discord = ["discord"]
elasticsearch = ["elasticsearch"] elasticsearch = ["elasticsearch"]
git = ["gitpython"] git = ["gitpython"]
@@ -7663,9 +7663,9 @@ streamlit = []
vertexai = ["google-cloud-aiplatform"] vertexai = ["google-cloud-aiplatform"]
weaviate = ["weaviate-client"] weaviate = ["weaviate-client"]
whatsapp = ["flask", "twilio"] whatsapp = ["flask", "twilio"]
youtube-channel = ["yt_dlp"] youtube = ["youtube-transcript-api", "yt_dlp"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.9,<3.12" python-versions = ">=3.9,<3.12"
content-hash = "a7282080c7a4379bdc6f33dfe9cae7eb20764aae0176137ba5c7af7cdcc58ede" content-hash = "58dce9bc5ef9c8d7e77d5fbc3176e24a8facbe89beddbf4c605b9c68e6617b5a"

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "embedchain" name = "embedchain"
version = "0.1.15" version = "0.1.16"
description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data" description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
authors = [ authors = [
"Taranjeet Singh <taranjeet@embedchain.ai>", "Taranjeet Singh <taranjeet@embedchain.ai>",
@@ -120,7 +120,7 @@ weaviate-client = { version = "^3.24.1", optional = true }
docx2txt = { version = "^0.8", optional = true } docx2txt = { version = "^0.8", optional = true }
pinecone-client = { version = "^2.2.4", optional = true } pinecone-client = { version = "^2.2.4", optional = true }
qdrant-client = { version = "1.6.3", optional = true } qdrant-client = { version = "1.6.3", optional = true }
unstructured = {extras = ["local-inference"], version = "^0.10.18", optional = true} unstructured = {extras = ["local-inference", "all-docs"], version = "^0.10.18", optional = true}
pillow = { version = "10.0.1", optional = true } pillow = { version = "10.0.1", optional = true }
torchvision = { version = ">=0.15.1, !=0.15.2", optional = true } torchvision = { version = ">=0.15.1, !=0.15.2", optional = true }
ftfy = { version = "6.1.1", optional = true } ftfy = { version = "6.1.1", optional = true }
@@ -169,7 +169,7 @@ huggingface_hub=["huggingface_hub"]
cohere = ["cohere"] cohere = ["cohere"]
milvus = ["pymilvus"] milvus = ["pymilvus"]
dataloaders=[ dataloaders=[
"youtube-transcripts-api", "youtube-transcript-api",
"beautifulsoup4", "beautifulsoup4",
"docx2txt", "docx2txt",
"duckduckgo-search", "duckduckgo-search",
@@ -193,9 +193,9 @@ json = ["llama-hub"]
postgres = ["psycopg", "psycopg-binary", "psycopg-pool"] postgres = ["psycopg", "psycopg-binary", "psycopg-pool"]
mysql = ["mysql-connector-python"] mysql = ["mysql-connector-python"]
git = ["gitpython"] git = ["gitpython"]
youtube_channel = [ youtube = [
"yt_dlp", "yt_dlp",
"youtube-transcripts-api", "youtube-transcript-api",
] ]
[tool.poetry.group.docs.dependencies] [tool.poetry.group.docs.dependencies]