[Features] Add Github and Youtube Channel loaders (#957)
Co-authored-by: Deven Patel <deven298@yahoo.com> Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import hashlib
|
||||
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.config.add_config import ChunkerConfig
|
||||
from embedchain.helper.json_serializable import JSONSerializable
|
||||
from embedchain.models.data_type import DataType
|
||||
|
||||
@@ -7,7 +10,15 @@ from embedchain.models.data_type import DataType
|
||||
class BaseChunker(JSONSerializable):
|
||||
def __init__(self, text_splitter):
|
||||
"""Initialize the chunker."""
|
||||
self.text_splitter = text_splitter
|
||||
if text_splitter is None:
|
||||
config = ChunkerConfig(chunk_size=1000, chunk_overlap=0, length_function=len)
|
||||
self.text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=config.chunk_size,
|
||||
chunk_overlap=config.chunk_overlap,
|
||||
length_function=config.length_function,
|
||||
)
|
||||
else:
|
||||
self.text_splitter = text_splitter
|
||||
self.data_type = None
|
||||
|
||||
def create_chunks(self, loader, src, app_id=None):
|
||||
|
||||
@@ -64,6 +64,8 @@ class DataFormatter(JSONSerializable):
|
||||
DataType.GMAIL: "embedchain.loaders.gmail.GmailLoader",
|
||||
DataType.NOTION: "embedchain.loaders.notion.NotionLoader",
|
||||
DataType.SUBSTACK: "embedchain.loaders.substack.SubstackLoader",
|
||||
DataType.GITHUB: "embedchain.loaders.github.GithubLoader",
|
||||
DataType.YOUTUBE_CHANNEL: "embedchain.loaders.youtube_channel.YoutubeChannelLoader",
|
||||
}
|
||||
|
||||
custom_loaders = set(
|
||||
@@ -114,6 +116,8 @@ class DataFormatter(JSONSerializable):
|
||||
DataType.SLACK: "embedchain.chunkers.slack.SlackChunker",
|
||||
DataType.DISCOURSE: "embedchain.chunkers.discourse.DiscourseChunker",
|
||||
DataType.SUBSTACK: "embedchain.chunkers.substack.SubstackChunker",
|
||||
DataType.GITHUB: "embedchain.chunkers.base_chunker.BaseChunker",
|
||||
DataType.YOUTUBE_CHANNEL: "embedchain.chunkers.base_chunker.BaseChunker",
|
||||
}
|
||||
|
||||
if data_type in chunker_classes:
|
||||
|
||||
81
embedchain/loaders/github.py
Normal file
81
embedchain/loaders/github.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import concurrent.futures
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.loaders.json import JSONLoader
|
||||
from embedchain.loaders.mdx import MdxLoader
|
||||
from embedchain.loaders.unstructured_file import UnstructuredLoader
|
||||
from embedchain.utils import detect_datatype
|
||||
|
||||
|
||||
class GithubLoader(BaseLoader):
|
||||
def load_data(self, repo_url):
|
||||
"""Load data from a git repo."""
|
||||
try:
|
||||
from git import Repo
|
||||
except ImportError as e:
|
||||
raise ValueError(
|
||||
"GithubLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[git]'`"
|
||||
) from e
|
||||
|
||||
mdx_loader = MdxLoader()
|
||||
json_loader = JSONLoader()
|
||||
unstructured_loader = UnstructuredLoader()
|
||||
data = []
|
||||
data_urls = []
|
||||
|
||||
def _fetch_or_clone_repo(repo_url: str, local_path: str):
|
||||
if os.path.exists(local_path):
|
||||
logging.info("Repository already exists. Fetching updates...")
|
||||
repo = Repo(local_path)
|
||||
origin = repo.remotes.origin
|
||||
origin.fetch()
|
||||
logging.info("Fetch completed.")
|
||||
else:
|
||||
logging.info("Cloning repository...")
|
||||
Repo.clone_from(repo_url, local_path)
|
||||
logging.info("Clone completed.")
|
||||
|
||||
def _load_file(file_path: str):
|
||||
try:
|
||||
data_type = detect_datatype(file_path).value
|
||||
except Exception:
|
||||
data_type = "unstructured"
|
||||
|
||||
if data_type == "mdx":
|
||||
data = mdx_loader.load_data(file_path)
|
||||
elif data_type == "json":
|
||||
data = json_loader.load_data(file_path)
|
||||
else:
|
||||
data = unstructured_loader.load_data(file_path)
|
||||
|
||||
return data.get("data", [])
|
||||
|
||||
def _add_repo_files(repo_path: str):
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future_to_file = {
|
||||
executor.submit(_load_file, os.path.join(root, filename)): os.path.join(root, filename)
|
||||
for root, _, files in os.walk(repo_path)
|
||||
for filename in files
|
||||
} # noqa: E501
|
||||
for future in concurrent.futures.as_completed(future_to_file):
|
||||
file = future_to_file[future]
|
||||
try:
|
||||
results = future.result()
|
||||
if results:
|
||||
data.extend(results)
|
||||
data_urls.extend([result.get("meta_data").get("url") for result in results])
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to process {file}: {e}")
|
||||
|
||||
source_hash = hashlib.sha256(repo_url.encode()).hexdigest()
|
||||
repo_path = f"/tmp/{source_hash}"
|
||||
_fetch_or_clone_repo(repo_url=repo_url, local_path=repo_path)
|
||||
_add_repo_files(repo_path)
|
||||
doc_id = hashlib.sha256((repo_url + ", ".join(data_urls)).encode()).hexdigest()
|
||||
return {
|
||||
"doc_id": doc_id,
|
||||
"data": data,
|
||||
}
|
||||
@@ -57,8 +57,8 @@ class SitemapLoader(BaseLoader):
|
||||
try:
|
||||
data = future.result()
|
||||
if data:
|
||||
output.append(data)
|
||||
output.extend(data)
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading page {link}: {e}")
|
||||
|
||||
return {"doc_id": doc_id, "data": [data[0] for data in output if data]}
|
||||
return {"doc_id": doc_id, "data": output}
|
||||
|
||||
70
embedchain/loaders/youtube_channel.py
Normal file
70
embedchain/loaders/youtube_channel.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import concurrent.futures
|
||||
import hashlib
|
||||
import logging
|
||||
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
||||
|
||||
|
||||
class YoutubeChannelLoader(BaseLoader):
|
||||
"""Loader for youtube channel."""
|
||||
|
||||
def load_data(self, channel_name):
|
||||
try:
|
||||
import yt_dlp
|
||||
except ImportError as e:
|
||||
raise ValueError(
|
||||
"YoutubeLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[youtube_channel]'`" # noqa: E501
|
||||
) from e
|
||||
|
||||
data = []
|
||||
data_urls = []
|
||||
youtube_url = f"https://www.youtube.com/{channel_name}/videos"
|
||||
youtube_video_loader = YoutubeVideoLoader()
|
||||
|
||||
def _get_yt_video_links():
|
||||
try:
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"extract_flat": True,
|
||||
}
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info_dict = ydl.extract_info(youtube_url, download=False)
|
||||
if "entries" in info_dict:
|
||||
videos = [entry["url"] for entry in info_dict["entries"]]
|
||||
return videos
|
||||
except Exception:
|
||||
logging.error(f"Failed to fetch youtube videos for channel: {channel_name}")
|
||||
return []
|
||||
|
||||
def _load_yt_video(video_link):
|
||||
try:
|
||||
each_load_data = youtube_video_loader.load_data(video_link)
|
||||
if each_load_data:
|
||||
return each_load_data.get("data")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load youtube video {video_link}: {e}")
|
||||
return None
|
||||
|
||||
def _add_youtube_channel():
|
||||
video_links = _get_yt_video_links()
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future_to_video = {
|
||||
executor.submit(_load_yt_video, video_link): video_link for video_link in video_links
|
||||
} # noqa: E501
|
||||
for future in concurrent.futures.as_completed(future_to_video):
|
||||
video = future_to_video[future]
|
||||
try:
|
||||
results = future.result()
|
||||
if results:
|
||||
data.extend(results)
|
||||
data_urls.extend([result.get("meta_data").get("url") for result in results])
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to process youtube video {video}: {e}")
|
||||
|
||||
_add_youtube_channel()
|
||||
doc_id = hashlib.sha256((youtube_url + ", ".join(data_urls)).encode()).hexdigest()
|
||||
return {
|
||||
"doc_id": doc_id,
|
||||
"data": data,
|
||||
}
|
||||
@@ -34,6 +34,8 @@ class IndirectDataType(Enum):
|
||||
SLACK = "slack"
|
||||
DISCOURSE = "discourse"
|
||||
SUBSTACK = "substack"
|
||||
GITHUB = "github"
|
||||
YOUTUBE_CHANNEL = "youtube_channel"
|
||||
|
||||
|
||||
class SpecialDataType(Enum):
|
||||
@@ -67,3 +69,5 @@ class DataType(Enum):
|
||||
SLACK = IndirectDataType.SLACK.value
|
||||
DISCOURSE = IndirectDataType.DISCOURSE.value
|
||||
SUBSTACK = IndirectDataType.SUBSTACK.value
|
||||
GITHUB = IndirectDataType.GITHUB.value
|
||||
YOUTUBE_CHANNEL = IndirectDataType.YOUTUBE_CHANNEL.value
|
||||
|
||||
@@ -255,6 +255,10 @@ def detect_datatype(source: Any) -> DataType:
|
||||
logging.debug(f"Source of `{formatted_source}` detected as `docs_site`.")
|
||||
return DataType.DOCS_SITE
|
||||
|
||||
if "github.com" in url.netloc:
|
||||
logging.debug(f"Source of `{formatted_source}` detected as `github`.")
|
||||
return DataType.GITHUB
|
||||
|
||||
# If none of the above conditions are met, it's a general web page
|
||||
logging.debug(f"Source of `{formatted_source}` detected as `web_page`.")
|
||||
return DataType.WEB_PAGE
|
||||
|
||||
119
poetry.lock
generated
119
poetry.lock
generated
@@ -1691,6 +1691,37 @@ files = [
|
||||
[package.dependencies]
|
||||
wcwidth = ">=0.2.5"
|
||||
|
||||
[[package]]
|
||||
name = "gitdb"
|
||||
version = "4.0.11"
|
||||
description = "Git Object Database"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"},
|
||||
{file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
smmap = ">=3.0.1,<6"
|
||||
|
||||
[[package]]
|
||||
name = "gitpython"
|
||||
version = "3.1.40"
|
||||
description = "GitPython is a Python library used to interact with Git repositories"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "GitPython-3.1.40-py3-none-any.whl", hash = "sha256:cf14627d5a8049ffbf49915732e5eddbe8134c3bdb9d476e6182b676fc573f8a"},
|
||||
{file = "GitPython-3.1.40.tar.gz", hash = "sha256:22b126e9ffb671fdd0c129796343a02bf67bf2994b35449ffc9321aa755e18a4"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
gitdb = ">=4.0.1,<5"
|
||||
|
||||
[package.extras]
|
||||
test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"]
|
||||
|
||||
[[package]]
|
||||
name = "google-api-core"
|
||||
version = "2.12.0"
|
||||
@@ -3380,6 +3411,17 @@ files = [
|
||||
{file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mutagen"
|
||||
version = "1.47.0"
|
||||
description = "read and write audio tags for many formats"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719"},
|
||||
{file = "mutagen-1.47.0.tar.gz", hash = "sha256:719fadef0a978c31b4cf3c956261b3c58b6948b32023078a2117b1de09f0fc99"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mypy-extensions"
|
||||
version = "1.0.0"
|
||||
@@ -4639,6 +4681,47 @@ files = [
|
||||
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pycryptodomex"
|
||||
version = "3.19.0"
|
||||
description = "Cryptographic library for Python"
|
||||
optional = true
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
files = [
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff64fd720def623bf64d8776f8d0deada1cc1bf1ec3c1f9d6f5bb5bd098d034f"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:61056a1fd3254f6f863de94c233b30dd33bc02f8c935b2000269705f1eeeffa4"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:258c4233a3fe5a6341780306a36c6fb072ef38ce676a6d41eec3e591347919e8"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e45bb4635b3c4e0a00ca9df75ef6295838c85c2ac44ad882410cb631ed1eeaa"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:a12144d785518f6491ad334c75ccdc6ad52ea49230b4237f319dbb7cef26f464"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27m-win32.whl", hash = "sha256:1789d89f61f70a4cd5483d4dfa8df7032efab1118f8b9894faae03c967707865"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27m-win_amd64.whl", hash = "sha256:eb2fc0ec241bf5e5ef56c8fbec4a2634d631e4c4f616a59b567947a0f35ad83c"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:c9a68a2f7bd091ccea54ad3be3e9d65eded813e6d79fdf4cc3604e26cdd6384f"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:8df69e41f7e7015a90b94d1096ec3d8e0182e73449487306709ec27379fff761"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:917033016ecc23c8933205585a0ab73e20020fdf671b7cd1be788a5c4039840b"},
|
||||
{file = "pycryptodomex-3.19.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:e8e5ecbd4da4157889fce8ba49da74764dd86c891410bfd6b24969fa46edda51"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:a77b79852175064c822b047fee7cf5a1f434f06ad075cc9986aa1c19a0c53eb0"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:5b883e1439ab63af976656446fb4839d566bb096f15fc3c06b5a99cde4927188"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3866d68e2fc345162b1b9b83ef80686acfe5cec0d134337f3b03950a0a8bf56"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74eb1f73f788facece7979ce91594dc177e1a9b5d5e3e64697dd58299e5cb4d"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cb51096a6a8d400724104db8a7e4f2206041a1f23e58924aa3d8d96bcb48338"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a588a1cb7781da9d5e1c84affd98c32aff9c89771eac8eaa659d2760666f7139"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:d4dd3b381ff5a5907a3eb98f5f6d32c64d319a840278ceea1dcfcc65063856f3"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:263de9a96d2fcbc9f5bd3a279f14ea0d5f072adb68ebd324987576ec25da084d"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-win32.whl", hash = "sha256:67c8eb79ab33d0fbcb56842992298ddb56eb6505a72369c20f60bc1d2b6fb002"},
|
||||
{file = "pycryptodomex-3.19.0-cp35-abi3-win_amd64.whl", hash = "sha256:09c9401dc06fb3d94cb1ec23b4ea067a25d1f4c6b7b118ff5631d0b5daaab3cc"},
|
||||
{file = "pycryptodomex-3.19.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:edbe083c299835de7e02c8aa0885cb904a75087d35e7bab75ebe5ed336e8c3e2"},
|
||||
{file = "pycryptodomex-3.19.0-pp27-pypy_73-win32.whl", hash = "sha256:136b284e9246b4ccf4f752d435c80f2c44fc2321c198505de1d43a95a3453b3c"},
|
||||
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5d73e9fa3fe830e7b6b42afc49d8329b07a049a47d12e0ef9225f2fd220f19b2"},
|
||||
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f1982c5bc311f0aab8c293524b861b485d76f7c9ab2c3ac9a25b6f7655975"},
|
||||
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb040b5dda1dff1e197d2ef71927bd6b8bfcb9793bc4dfe0bb6df1e691eaacb"},
|
||||
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:800a2b05cfb83654df80266692f7092eeefe2a314fa7901dcefab255934faeec"},
|
||||
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c01678aee8ac0c1a461cbc38ad496f953f9efcb1fa19f5637cbeba7544792a53"},
|
||||
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2126bc54beccbede6eade00e647106b4f4c21e5201d2b0a73e9e816a01c50905"},
|
||||
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b801216c48c0886742abf286a9a6b117e248ca144d8ceec1f931ce2dd0c9cb40"},
|
||||
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:50cb18d4dd87571006fd2447ccec85e6cec0136632a550aa29226ba075c80644"},
|
||||
{file = "pycryptodomex-3.19.0.tar.gz", hash = "sha256:af83a554b3f077564229865c45af0791be008ac6469ef0098152139e6bd4b5b6"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pydantic"
|
||||
version = "2.4.2"
|
||||
@@ -6014,6 +6097,17 @@ files = [
|
||||
optional = ["SQLAlchemy (>=1.4,<3)", "aiodns (>1.0)", "aiohttp (>=3.7.3,<4)", "boto3 (<=2)", "websocket-client (>=1,<2)", "websockets (>=10,<11)"]
|
||||
testing = ["Flask (>=1,<2)", "Flask-Sockets (>=0.2,<1)", "Jinja2 (==3.0.3)", "Werkzeug (<2)", "black (==22.8.0)", "boto3 (<=2)", "click (==8.0.4)", "databases (>=0.5)", "flake8 (>=5,<6)", "itsdangerous (==1.1.0)", "moto (>=3,<4)", "psutil (>=5,<6)", "pytest (>=6.2.5,<7)", "pytest-asyncio (<1)", "pytest-cov (>=2,<3)"]
|
||||
|
||||
[[package]]
|
||||
name = "smmap"
|
||||
version = "5.0.1"
|
||||
description = "A pure Python implementation of a sliding window memory map manager"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"},
|
||||
{file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sniffio"
|
||||
version = "1.3.0"
|
||||
@@ -7508,6 +7602,27 @@ files = [
|
||||
[package.dependencies]
|
||||
requests = "*"
|
||||
|
||||
[[package]]
|
||||
name = "yt-dlp"
|
||||
version = "2023.11.16"
|
||||
description = "A youtube-dl fork with additional features and patches"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "yt-dlp-2023.11.16.tar.gz", hash = "sha256:f0ccdaf12e08b15902601a4671c7ab12906d7b11de3ae75fa6506811c24ec5da"},
|
||||
{file = "yt_dlp-2023.11.16-py2.py3-none-any.whl", hash = "sha256:0322ba85aa4afdb75f8641ed550e5958964daff034aeb477abb15031fd9a51ed"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
brotli = {version = "*", markers = "implementation_name == \"cpython\""}
|
||||
brotlicffi = {version = "*", markers = "implementation_name != \"cpython\""}
|
||||
certifi = "*"
|
||||
mutagen = "*"
|
||||
pycryptodomex = "*"
|
||||
requests = ">=2.31.0,<3"
|
||||
urllib3 = ">=1.26.17,<3"
|
||||
websockets = "*"
|
||||
|
||||
[[package]]
|
||||
name = "zipp"
|
||||
version = "3.17.0"
|
||||
@@ -7529,6 +7644,7 @@ community = ["llama-hub"]
|
||||
dataloaders = ["beautifulsoup4", "docx2txt", "duckduckgo-search", "pypdf", "pytube", "sentence-transformers", "unstructured"]
|
||||
discord = ["discord"]
|
||||
elasticsearch = ["elasticsearch"]
|
||||
git = ["gitpython"]
|
||||
gmail = ["llama-hub", "requests"]
|
||||
huggingface-hub = ["huggingface_hub"]
|
||||
images = ["ftfy", "pillow", "regex", "torch", "torchvision"]
|
||||
@@ -7547,8 +7663,9 @@ streamlit = []
|
||||
vertexai = ["google-cloud-aiplatform"]
|
||||
weaviate = ["weaviate-client"]
|
||||
whatsapp = ["flask", "twilio"]
|
||||
youtube-channel = ["yt_dlp"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.9,<3.12"
|
||||
content-hash = "fe9ebe5f637303885981d10ace60b955635c7ca7586605546837e59206bfefd7"
|
||||
content-hash = "a7282080c7a4379bdc6f33dfe9cae7eb20764aae0176137ba5c7af7cdcc58ede"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "embedchain"
|
||||
version = "0.1.13"
|
||||
version = "0.1.14"
|
||||
description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
|
||||
authors = [
|
||||
"Taranjeet Singh <taranjeet@embedchain.ai>",
|
||||
@@ -134,6 +134,8 @@ psycopg = { version = "^3.1.12", optional = true }
|
||||
psycopg-binary = { version = "^3.1.12", optional = true }
|
||||
psycopg-pool = { version = "^3.1.8", optional = true }
|
||||
mysql-connector-python = { version = "^8.1.0", optional = true }
|
||||
gitpython = { version = "^3.1.38", optional = true }
|
||||
yt_dlp = { version = "^2023.11.14", optional = true }
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^23.3.0"
|
||||
@@ -190,6 +192,11 @@ gmail = [
|
||||
json = ["llama-hub"]
|
||||
postgres = ["psycopg", "psycopg-binary", "psycopg-pool"]
|
||||
mysql = ["mysql-connector-python"]
|
||||
git = ["gitpython"]
|
||||
youtube_channel = [
|
||||
"yt_dlp",
|
||||
"youtube-transcripts-api",
|
||||
]
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user