diff --git a/embedchain/chunkers/base_chunker.py b/embedchain/chunkers/base_chunker.py index f7b5cb09..7a218ec1 100644 --- a/embedchain/chunkers/base_chunker.py +++ b/embedchain/chunkers/base_chunker.py @@ -1,5 +1,8 @@ import hashlib +from langchain.text_splitter import RecursiveCharacterTextSplitter + +from embedchain.config.add_config import ChunkerConfig from embedchain.helper.json_serializable import JSONSerializable from embedchain.models.data_type import DataType @@ -7,7 +10,15 @@ from embedchain.models.data_type import DataType class BaseChunker(JSONSerializable): def __init__(self, text_splitter): """Initialize the chunker.""" - self.text_splitter = text_splitter + if text_splitter is None: + config = ChunkerConfig(chunk_size=1000, chunk_overlap=0, length_function=len) + self.text_splitter = RecursiveCharacterTextSplitter( + chunk_size=config.chunk_size, + chunk_overlap=config.chunk_overlap, + length_function=config.length_function, + ) + else: + self.text_splitter = text_splitter self.data_type = None def create_chunks(self, loader, src, app_id=None): diff --git a/embedchain/data_formatter/data_formatter.py b/embedchain/data_formatter/data_formatter.py index 2e702283..20bab349 100644 --- a/embedchain/data_formatter/data_formatter.py +++ b/embedchain/data_formatter/data_formatter.py @@ -64,6 +64,8 @@ class DataFormatter(JSONSerializable): DataType.GMAIL: "embedchain.loaders.gmail.GmailLoader", DataType.NOTION: "embedchain.loaders.notion.NotionLoader", DataType.SUBSTACK: "embedchain.loaders.substack.SubstackLoader", + DataType.GITHUB: "embedchain.loaders.github.GithubLoader", + DataType.YOUTUBE_CHANNEL: "embedchain.loaders.youtube_channel.YoutubeChannelLoader", } custom_loaders = set( @@ -114,6 +116,8 @@ class DataFormatter(JSONSerializable): DataType.SLACK: "embedchain.chunkers.slack.SlackChunker", DataType.DISCOURSE: "embedchain.chunkers.discourse.DiscourseChunker", DataType.SUBSTACK: "embedchain.chunkers.substack.SubstackChunker", + DataType.GITHUB: "embedchain.chunkers.base_chunker.BaseChunker", + DataType.YOUTUBE_CHANNEL: "embedchain.chunkers.base_chunker.BaseChunker", } if data_type in chunker_classes: diff --git a/embedchain/loaders/github.py b/embedchain/loaders/github.py new file mode 100644 index 00000000..6b48f0d1 --- /dev/null +++ b/embedchain/loaders/github.py @@ -0,0 +1,81 @@ +import concurrent.futures +import hashlib +import logging +import os + +from embedchain.loaders.base_loader import BaseLoader +from embedchain.loaders.json import JSONLoader +from embedchain.loaders.mdx import MdxLoader +from embedchain.loaders.unstructured_file import UnstructuredLoader +from embedchain.utils import detect_datatype + + +class GithubLoader(BaseLoader): + def load_data(self, repo_url): + """Load data from a git repo.""" + try: + from git import Repo + except ImportError as e: + raise ValueError( + "GithubLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[git]'`" + ) from e + + mdx_loader = MdxLoader() + json_loader = JSONLoader() + unstructured_loader = UnstructuredLoader() + data = [] + data_urls = [] + + def _fetch_or_clone_repo(repo_url: str, local_path: str): + if os.path.exists(local_path): + logging.info("Repository already exists. Fetching updates...") + repo = Repo(local_path) + origin = repo.remotes.origin + origin.fetch() + logging.info("Fetch completed.") + else: + logging.info("Cloning repository...") + Repo.clone_from(repo_url, local_path) + logging.info("Clone completed.") + + def _load_file(file_path: str): + try: + data_type = detect_datatype(file_path).value + except Exception: + data_type = "unstructured" + + if data_type == "mdx": + data = mdx_loader.load_data(file_path) + elif data_type == "json": + data = json_loader.load_data(file_path) + else: + data = unstructured_loader.load_data(file_path) + + return data.get("data", []) + + def _add_repo_files(repo_path: str): + with concurrent.futures.ThreadPoolExecutor() as executor: + future_to_file = { + executor.submit(_load_file, os.path.join(root, filename)): os.path.join(root, filename) + for root, _, files in os.walk(repo_path) + for filename in files + } # noqa: E501 + for future in concurrent.futures.as_completed(future_to_file): + file = future_to_file[future] + try: + results = future.result() + if results: + data.extend(results) + data_urls.extend([result.get("meta_data").get("url") for result in results]) + except Exception as e: + logging.error(f"Failed to process {file}: {e}") + + source_hash = hashlib.sha256(repo_url.encode()).hexdigest() + repo_path = f"/tmp/{source_hash}" + _fetch_or_clone_repo(repo_url=repo_url, local_path=repo_path) + _add_repo_files(repo_path) + doc_id = hashlib.sha256((repo_url + ", ".join(data_urls)).encode()).hexdigest() + return { + "doc_id": doc_id, + "data": data, + } diff --git a/embedchain/loaders/sitemap.py b/embedchain/loaders/sitemap.py index 29638b0c..fa8bbe50 100644 --- a/embedchain/loaders/sitemap.py +++ b/embedchain/loaders/sitemap.py @@ -57,8 +57,8 @@ class SitemapLoader(BaseLoader): try: data = future.result() if data: - output.append(data) + output.extend(data) except Exception as e: logging.error(f"Error loading page {link}: {e}") - return {"doc_id": doc_id, "data": [data[0] for data in output if data]} + return {"doc_id": doc_id, "data": output} diff --git a/embedchain/loaders/youtube_channel.py b/embedchain/loaders/youtube_channel.py new file mode 100644 index 00000000..b213ef7d --- /dev/null +++ b/embedchain/loaders/youtube_channel.py @@ -0,0 +1,70 @@ +import concurrent.futures +import hashlib +import logging + +from embedchain.loaders.base_loader import BaseLoader +from embedchain.loaders.youtube_video import YoutubeVideoLoader + + +class YoutubeChannelLoader(BaseLoader): + """Loader for youtube channel.""" + + def load_data(self, channel_name): + try: + import yt_dlp + except ImportError as e: + raise ValueError( + "YoutubeLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[youtube_channel]'`" # noqa: E501 + ) from e + + data = [] + data_urls = [] + youtube_url = f"https://www.youtube.com/{channel_name}/videos" + youtube_video_loader = YoutubeVideoLoader() + + def _get_yt_video_links(): + try: + ydl_opts = { + "quiet": True, + "extract_flat": True, + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info_dict = ydl.extract_info(youtube_url, download=False) + if "entries" in info_dict: + videos = [entry["url"] for entry in info_dict["entries"]] + return videos + except Exception: + logging.error(f"Failed to fetch youtube videos for channel: {channel_name}") + return [] + + def _load_yt_video(video_link): + try: + each_load_data = youtube_video_loader.load_data(video_link) + if each_load_data: + return each_load_data.get("data") + except Exception as e: + logging.error(f"Failed to load youtube video {video_link}: {e}") + return None + + def _add_youtube_channel(): + video_links = _get_yt_video_links() + with concurrent.futures.ThreadPoolExecutor() as executor: + future_to_video = { + executor.submit(_load_yt_video, video_link): video_link for video_link in video_links + } # noqa: E501 + for future in concurrent.futures.as_completed(future_to_video): + video = future_to_video[future] + try: + results = future.result() + if results: + data.extend(results) + data_urls.extend([result.get("meta_data").get("url") for result in results]) + except Exception as e: + logging.error(f"Failed to process youtube video {video}: {e}") + + _add_youtube_channel() + doc_id = hashlib.sha256((youtube_url + ", ".join(data_urls)).encode()).hexdigest() + return { + "doc_id": doc_id, + "data": data, + } diff --git a/embedchain/models/data_type.py b/embedchain/models/data_type.py index a46034fc..c54ee249 100644 --- a/embedchain/models/data_type.py +++ b/embedchain/models/data_type.py @@ -34,6 +34,8 @@ class IndirectDataType(Enum): SLACK = "slack" DISCOURSE = "discourse" SUBSTACK = "substack" + GITHUB = "github" + YOUTUBE_CHANNEL = "youtube_channel" class SpecialDataType(Enum): @@ -67,3 +69,5 @@ class DataType(Enum): SLACK = IndirectDataType.SLACK.value DISCOURSE = IndirectDataType.DISCOURSE.value SUBSTACK = IndirectDataType.SUBSTACK.value + GITHUB = IndirectDataType.GITHUB.value + YOUTUBE_CHANNEL = IndirectDataType.YOUTUBE_CHANNEL.value diff --git a/embedchain/utils.py b/embedchain/utils.py index 2abd2ad7..2ae20e39 100644 --- a/embedchain/utils.py +++ b/embedchain/utils.py @@ -255,6 +255,10 @@ def detect_datatype(source: Any) -> DataType: logging.debug(f"Source of `{formatted_source}` detected as `docs_site`.") return DataType.DOCS_SITE + if "github.com" in url.netloc: + logging.debug(f"Source of `{formatted_source}` detected as `github`.") + return DataType.GITHUB + # If none of the above conditions are met, it's a general web page logging.debug(f"Source of `{formatted_source}` detected as `web_page`.") return DataType.WEB_PAGE diff --git a/poetry.lock b/poetry.lock index 7e227050..dd8b38da 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1691,6 +1691,37 @@ files = [ [package.dependencies] wcwidth = ">=0.2.5" +[[package]] +name = "gitdb" +version = "4.0.11" +description = "Git Object Database" +optional = true +python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"}, + {file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.40" +description = "GitPython is a Python library used to interact with Git repositories" +optional = true +python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.40-py3-none-any.whl", hash = "sha256:cf14627d5a8049ffbf49915732e5eddbe8134c3bdb9d476e6182b676fc573f8a"}, + {file = "GitPython-3.1.40.tar.gz", hash = "sha256:22b126e9ffb671fdd0c129796343a02bf67bf2994b35449ffc9321aa755e18a4"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"] + [[package]] name = "google-api-core" version = "2.12.0" @@ -3380,6 +3411,17 @@ files = [ {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, ] +[[package]] +name = "mutagen" +version = "1.47.0" +description = "read and write audio tags for many formats" +optional = true +python-versions = ">=3.7" +files = [ + {file = "mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719"}, + {file = "mutagen-1.47.0.tar.gz", hash = "sha256:719fadef0a978c31b4cf3c956261b3c58b6948b32023078a2117b1de09f0fc99"}, +] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -4639,6 +4681,47 @@ files = [ {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] +[[package]] +name = "pycryptodomex" +version = "3.19.0" +description = "Cryptographic library for Python" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "pycryptodomex-3.19.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff64fd720def623bf64d8776f8d0deada1cc1bf1ec3c1f9d6f5bb5bd098d034f"}, + {file = "pycryptodomex-3.19.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:61056a1fd3254f6f863de94c233b30dd33bc02f8c935b2000269705f1eeeffa4"}, + {file = "pycryptodomex-3.19.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:258c4233a3fe5a6341780306a36c6fb072ef38ce676a6d41eec3e591347919e8"}, + {file = "pycryptodomex-3.19.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e45bb4635b3c4e0a00ca9df75ef6295838c85c2ac44ad882410cb631ed1eeaa"}, + {file = "pycryptodomex-3.19.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:a12144d785518f6491ad334c75ccdc6ad52ea49230b4237f319dbb7cef26f464"}, + {file = "pycryptodomex-3.19.0-cp27-cp27m-win32.whl", hash = "sha256:1789d89f61f70a4cd5483d4dfa8df7032efab1118f8b9894faae03c967707865"}, + {file = "pycryptodomex-3.19.0-cp27-cp27m-win_amd64.whl", hash = "sha256:eb2fc0ec241bf5e5ef56c8fbec4a2634d631e4c4f616a59b567947a0f35ad83c"}, + {file = "pycryptodomex-3.19.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:c9a68a2f7bd091ccea54ad3be3e9d65eded813e6d79fdf4cc3604e26cdd6384f"}, + {file = "pycryptodomex-3.19.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:8df69e41f7e7015a90b94d1096ec3d8e0182e73449487306709ec27379fff761"}, + {file = "pycryptodomex-3.19.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:917033016ecc23c8933205585a0ab73e20020fdf671b7cd1be788a5c4039840b"}, + {file = "pycryptodomex-3.19.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:e8e5ecbd4da4157889fce8ba49da74764dd86c891410bfd6b24969fa46edda51"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:a77b79852175064c822b047fee7cf5a1f434f06ad075cc9986aa1c19a0c53eb0"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:5b883e1439ab63af976656446fb4839d566bb096f15fc3c06b5a99cde4927188"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3866d68e2fc345162b1b9b83ef80686acfe5cec0d134337f3b03950a0a8bf56"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74eb1f73f788facece7979ce91594dc177e1a9b5d5e3e64697dd58299e5cb4d"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cb51096a6a8d400724104db8a7e4f2206041a1f23e58924aa3d8d96bcb48338"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a588a1cb7781da9d5e1c84affd98c32aff9c89771eac8eaa659d2760666f7139"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:d4dd3b381ff5a5907a3eb98f5f6d32c64d319a840278ceea1dcfcc65063856f3"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:263de9a96d2fcbc9f5bd3a279f14ea0d5f072adb68ebd324987576ec25da084d"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-win32.whl", hash = "sha256:67c8eb79ab33d0fbcb56842992298ddb56eb6505a72369c20f60bc1d2b6fb002"}, + {file = "pycryptodomex-3.19.0-cp35-abi3-win_amd64.whl", hash = "sha256:09c9401dc06fb3d94cb1ec23b4ea067a25d1f4c6b7b118ff5631d0b5daaab3cc"}, + {file = "pycryptodomex-3.19.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:edbe083c299835de7e02c8aa0885cb904a75087d35e7bab75ebe5ed336e8c3e2"}, + {file = "pycryptodomex-3.19.0-pp27-pypy_73-win32.whl", hash = "sha256:136b284e9246b4ccf4f752d435c80f2c44fc2321c198505de1d43a95a3453b3c"}, + {file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5d73e9fa3fe830e7b6b42afc49d8329b07a049a47d12e0ef9225f2fd220f19b2"}, + {file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f1982c5bc311f0aab8c293524b861b485d76f7c9ab2c3ac9a25b6f7655975"}, + {file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb040b5dda1dff1e197d2ef71927bd6b8bfcb9793bc4dfe0bb6df1e691eaacb"}, + {file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:800a2b05cfb83654df80266692f7092eeefe2a314fa7901dcefab255934faeec"}, + {file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c01678aee8ac0c1a461cbc38ad496f953f9efcb1fa19f5637cbeba7544792a53"}, + {file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2126bc54beccbede6eade00e647106b4f4c21e5201d2b0a73e9e816a01c50905"}, + {file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b801216c48c0886742abf286a9a6b117e248ca144d8ceec1f931ce2dd0c9cb40"}, + {file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:50cb18d4dd87571006fd2447ccec85e6cec0136632a550aa29226ba075c80644"}, + {file = "pycryptodomex-3.19.0.tar.gz", hash = "sha256:af83a554b3f077564229865c45af0791be008ac6469ef0098152139e6bd4b5b6"}, +] + [[package]] name = "pydantic" version = "2.4.2" @@ -6014,6 +6097,17 @@ files = [ optional = ["SQLAlchemy (>=1.4,<3)", "aiodns (>1.0)", "aiohttp (>=3.7.3,<4)", "boto3 (<=2)", "websocket-client (>=1,<2)", "websockets (>=10,<11)"] testing = ["Flask (>=1,<2)", "Flask-Sockets (>=0.2,<1)", "Jinja2 (==3.0.3)", "Werkzeug (<2)", "black (==22.8.0)", "boto3 (<=2)", "click (==8.0.4)", "databases (>=0.5)", "flake8 (>=5,<6)", "itsdangerous (==1.1.0)", "moto (>=3,<4)", "psutil (>=5,<6)", "pytest (>=6.2.5,<7)", "pytest-asyncio (<1)", "pytest-cov (>=2,<3)"] +[[package]] +name = "smmap" +version = "5.0.1" +description = "A pure Python implementation of a sliding window memory map manager" +optional = true +python-versions = ">=3.7" +files = [ + {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, + {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, +] + [[package]] name = "sniffio" version = "1.3.0" @@ -7508,6 +7602,27 @@ files = [ [package.dependencies] requests = "*" +[[package]] +name = "yt-dlp" +version = "2023.11.16" +description = "A youtube-dl fork with additional features and patches" +optional = true +python-versions = ">=3.7" +files = [ + {file = "yt-dlp-2023.11.16.tar.gz", hash = "sha256:f0ccdaf12e08b15902601a4671c7ab12906d7b11de3ae75fa6506811c24ec5da"}, + {file = "yt_dlp-2023.11.16-py2.py3-none-any.whl", hash = "sha256:0322ba85aa4afdb75f8641ed550e5958964daff034aeb477abb15031fd9a51ed"}, +] + +[package.dependencies] +brotli = {version = "*", markers = "implementation_name == \"cpython\""} +brotlicffi = {version = "*", markers = "implementation_name != \"cpython\""} +certifi = "*" +mutagen = "*" +pycryptodomex = "*" +requests = ">=2.31.0,<3" +urllib3 = ">=1.26.17,<3" +websockets = "*" + [[package]] name = "zipp" version = "3.17.0" @@ -7529,6 +7644,7 @@ community = ["llama-hub"] dataloaders = ["beautifulsoup4", "docx2txt", "duckduckgo-search", "pypdf", "pytube", "sentence-transformers", "unstructured"] discord = ["discord"] elasticsearch = ["elasticsearch"] +git = ["gitpython"] gmail = ["llama-hub", "requests"] huggingface-hub = ["huggingface_hub"] images = ["ftfy", "pillow", "regex", "torch", "torchvision"] @@ -7547,8 +7663,9 @@ streamlit = [] vertexai = ["google-cloud-aiplatform"] weaviate = ["weaviate-client"] whatsapp = ["flask", "twilio"] +youtube-channel = ["yt_dlp"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "fe9ebe5f637303885981d10ace60b955635c7ca7586605546837e59206bfefd7" +content-hash = "a7282080c7a4379bdc6f33dfe9cae7eb20764aae0176137ba5c7af7cdcc58ede" diff --git a/pyproject.toml b/pyproject.toml index 105033eb..f285ef44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.1.13" +version = "0.1.14" description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data" authors = [ "Taranjeet Singh ", @@ -134,6 +134,8 @@ psycopg = { version = "^3.1.12", optional = true } psycopg-binary = { version = "^3.1.12", optional = true } psycopg-pool = { version = "^3.1.8", optional = true } mysql-connector-python = { version = "^8.1.0", optional = true } +gitpython = { version = "^3.1.38", optional = true } +yt_dlp = { version = "^2023.11.14", optional = true } [tool.poetry.group.dev.dependencies] black = "^23.3.0" @@ -190,6 +192,11 @@ gmail = [ json = ["llama-hub"] postgres = ["psycopg", "psycopg-binary", "psycopg-pool"] mysql = ["mysql-connector-python"] +git = ["gitpython"] +youtube_channel = [ + "yt_dlp", + "youtube-transcripts-api", +] [tool.poetry.group.docs.dependencies]