From 36af1a76155a742a7849f2338c39ea964b336728 Mon Sep 17 00:00:00 2001 From: Deven Patel Date: Fri, 1 Dec 2023 11:24:13 -0800 Subject: [PATCH] [Improvement] improve github loader (#984) --- docs/data-sources/github.mdx | 50 +++ docs/data-sources/mysql.mdx | 1 - docs/data-sources/overview.mdx | 1 + embedchain/data_formatter/data_formatter.py | 2 +- embedchain/loaders/github.py | 349 +++++++++++++++----- poetry.lock | 139 +++++++- pyproject.toml | 3 +- tests/loaders/test_github.py | 33 ++ 8 files changed, 487 insertions(+), 91 deletions(-) create mode 100644 docs/data-sources/github.mdx create mode 100644 tests/loaders/test_github.py diff --git a/docs/data-sources/github.mdx b/docs/data-sources/github.mdx new file mode 100644 index 00000000..bbce3b01 --- /dev/null +++ b/docs/data-sources/github.mdx @@ -0,0 +1,50 @@ +--- +title: 📝 Github +--- + +1. Setup the Github loader by configuring the Github account with username and personal access token (PAT). Check out [this](https://docs.github.com/en/enterprise-server@3.6/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token) link to learn how to create a PAT. +```Python +from embedchain.loaders.github import GithubLoader + +loader = GithubLoader( + config={ + "token":"ghp_xxxx" + } + ) +``` + +2. Once you setup the loader, you can create an app and load data using the above Github loader +```Python +import os +from embedchain.pipeline import Pipeline as App + +os.environ["OPENAI_API_KEY"] = "sk-xxxx" + +app = App() + +app.add("repo:embedchain/embedchain type:repo", data_type="github", loader=loader) + +response = app.query("What is Embedchain?") +# Answer: Embedchain is a Data Platform for Large Language Models (LLMs). It allows users to seamlessly load, index, retrieve, and sync unstructured data in order to build dynamic, LLM-powered applications. There is also a JavaScript implementation called embedchain-js available on GitHub. +``` +The `add` function of the app will accept any valid github query with qualifiers. It only supports loading github code, repository, issues and pull-requests. + +You must provide qualifiers `type:` and `repo:` in the query. The `type:` qualifier can be a combination of `code`, `repo`, `pr`, `issue`. The `repo:` qualifier must be a valid github repository name. + + + + - `repo:embedchain/embedchain type:repo` - to load the repository + - `repo:embedchain/embedchain type:issue,pr` - to load the issues and pull-requests of the repository + - `repo:embedchain/embedchain type:issue state:closed` - to load the closed issues of the repository + + +3. We automatically create a chunker to chunk your GitHub data, however if you wish to provide your own chunker class. Here is how you can do that: +```Python +from embedchain.chunkers.common_chunker import CommonChunker +from embedchain.config.add_config import ChunkerConfig + +github_chunker_config = ChunkerConfig(chunk_size=2000, chunk_overlap=0, length_function=len) +github_chunker = CommonChunker(config=github_chunker_config) + +app.add(load_query, data_type="github", loader=loader, chunker=github_chunker) +``` diff --git a/docs/data-sources/mysql.mdx b/docs/data-sources/mysql.mdx index 1386b6cb..e3ee9a48 100644 --- a/docs/data-sources/mysql.mdx +++ b/docs/data-sources/mysql.mdx @@ -21,7 +21,6 @@ For more details on how to setup with valid config, check MySQL [documentation]( 2. Once you setup the loader, you can create an app and load data using the above MySQL loader ```Python -import os from embedchain.pipeline import Pipeline as App app = App() diff --git a/docs/data-sources/overview.mdx b/docs/data-sources/overview.mdx index 35c23903..b509e7d0 100644 --- a/docs/data-sources/overview.mdx +++ b/docs/data-sources/overview.mdx @@ -25,6 +25,7 @@ Embedchain comes with built-in support for various data sources. We handle the c +
diff --git a/embedchain/data_formatter/data_formatter.py b/embedchain/data_formatter/data_formatter.py index ce6d8307..73f929f7 100644 --- a/embedchain/data_formatter/data_formatter.py +++ b/embedchain/data_formatter/data_formatter.py @@ -64,7 +64,6 @@ class DataFormatter(JSONSerializable): DataType.GMAIL: "embedchain.loaders.gmail.GmailLoader", DataType.NOTION: "embedchain.loaders.notion.NotionLoader", DataType.SUBSTACK: "embedchain.loaders.substack.SubstackLoader", - DataType.GITHUB: "embedchain.loaders.github.GithubLoader", DataType.YOUTUBE_CHANNEL: "embedchain.loaders.youtube_channel.YoutubeChannelLoader", DataType.DISCORD: "embedchain.loaders.discord.DiscordLoader", } @@ -75,6 +74,7 @@ class DataFormatter(JSONSerializable): DataType.MYSQL, DataType.SLACK, DataType.DISCOURSE, + DataType.GITHUB, ] ) diff --git a/embedchain/loaders/github.py b/embedchain/loaders/github.py index b4039825..e19ab2cc 100644 --- a/embedchain/loaders/github.py +++ b/embedchain/loaders/github.py @@ -2,116 +2,297 @@ import concurrent.futures import hashlib import logging import os +import re +import shlex +from typing import Any, Dict, Optional from tqdm import tqdm from embedchain.loaders.base_loader import BaseLoader -from embedchain.loaders.json import JSONLoader -from embedchain.loaders.mdx import MdxLoader -from embedchain.utils import detect_datatype +from embedchain.utils import clean_string +GITHUB_URL = "https://github.com" +GITHUB_API_URL = "https://api.github.com" -def _load_file_data(path): - data = [] - data_content = [] - try: - with open(path, "rb") as f: - content = f.read().decode("utf-8") - except Exception as e: - print(f"Error reading file {path}: {e}") - raise ValueError(f"Failed to read file {path}") - - meta_data = {} - meta_data["url"] = path - data.append( - { - "content": content, - "meta_data": meta_data, - } - ) - data_content.append(content) - doc_id = hashlib.sha256((" ".join(data_content) + path).encode()).hexdigest() - return { - "doc_id": doc_id, - "data": data, - } +VALID_SEARCH_TYPES = set(["code", "repo", "pr", "issue", "discussion"]) class GithubLoader(BaseLoader): - def load_data(self, repo_url): - """Load data from a git repo.""" + """Load data from github search query.""" + + def __init__(self, config: Optional[Dict[str, Any]] = None): + super().__init__() + if not config: + raise ValueError( + "GithubLoader requires a personal access token to use github api. Check - `https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic`" # noqa: E501 + ) + try: - from git import Repo + from github import Github except ImportError as e: raise ValueError( - "GithubLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[git]'`" + "GithubLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[github]'`" ) from e - mdx_loader = MdxLoader() - json_loader = JSONLoader() - data = [] - data_urls = [] + self.config = config + token = config.get("token") + if not token: + raise ValueError( + "GithubLoader requires a personal access token to use github api. Check - `https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic`" # noqa: E501 + ) + + try: + self.client = Github(token) + except Exception as e: + logging.error(f"GithubLoader failed to initialize client: {e}") + self.client = None + + def _github_search_code(self, query: str): + """Search github code.""" + data = [] + results = self.client.search_code(query) + for result in tqdm(results, total=results.totalCount, desc="Loading code files from github"): + url = result.html_url + logging.info(f"Added data from url: {url}") + content = result.decoded_content.decode("utf-8") + metadata = { + "url": url, + } + data.append( + { + "content": clean_string(content), + "meta_data": metadata, + } + ) + return data + + def _get_github_repo_data(self, repo_url: str): + local_hash = hashlib.sha256(repo_url.encode()).hexdigest() + local_path = f"/tmp/{local_hash}" + data = [] + + def _get_repo_tree(repo_url: str, local_path: str): + try: + from git import Repo + except ImportError as e: + raise ValueError( + "GithubLoader requires extra dependencies. Install with `pip install --upgrade 'embedchain[github]'`" # noqa: E501 + ) from e - def _fetch_or_clone_repo(repo_url: str, local_path: str): if os.path.exists(local_path): logging.info("Repository already exists. Fetching updates...") repo = Repo(local_path) - origin = repo.remotes.origin - origin.fetch() logging.info("Fetch completed.") else: logging.info("Cloning repository...") Repo.clone_from(repo_url, local_path) + repo = Repo.clone_from(repo_url, local_path) logging.info("Clone completed.") + return repo.head.commit.tree - def _load_file(file_path: str): - try: - data_type = detect_datatype(file_path).value - except Exception: - data_type = "unstructured" - - if data_type == "mdx": - data = mdx_loader.load_data(file_path) - elif data_type == "json": - data = json_loader.load_data(file_path) - else: - data = _load_file_data(file_path) - - return data.get("data", []) - - def _is_file_empty(file_path): - return os.path.getsize(file_path) == 0 - - def _is_whitelisted(file_path): - whitelisted_extensions = ["md", "txt", "html", "json", "py", "js", "jsx", "ts", "tsx", "mdx", "rst"] - _, file_extension = os.path.splitext(file_path) - return file_extension[1:] in whitelisted_extensions - - def _add_repo_files(repo_path: str): - with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: - future_to_file = { - executor.submit(_load_file, os.path.join(root, filename)): os.path.join(root, filename) - for root, _, files in os.walk(repo_path) - for filename in files - if _is_whitelisted(os.path.join(root, filename)) - and not _is_file_empty(os.path.join(root, filename)) # noqa:E501 - } - for future in tqdm(concurrent.futures.as_completed(future_to_file), total=len(future_to_file)): - file = future_to_file[future] + def _get_repo_tree_contents(repo_path, tree, progress_bar): + for subtree in tree: + if subtree.type == "tree": + _get_repo_tree_contents(repo_path, subtree, progress_bar) + else: + assert subtree.type == "blob" try: - results = future.result() - if results: - data.extend(results) - data_urls.extend([result.get("meta_data").get("url") for result in results]) - except Exception as e: - logging.warn(f"Failed to process {file}: {e}") + contents = subtree.data_stream.read().decode("utf-8") + except Exception: + logging.warning(f"Failed to read file: {subtree.path}") + progress_bar.update(1) if progress_bar else None + continue + + url = f"{repo_url.rstrip('.git')}/blob/main/{subtree.path}" + data.append( + { + "content": clean_string(contents), + "meta_data": { + "url": url, + }, + } + ) + if progress_bar is not None: + progress_bar.update(1) + + repo_tree = _get_repo_tree(repo_url, local_path) + tree_list = list(repo_tree.traverse()) + with tqdm(total=len(tree_list), desc="Loading files:", unit="item") as progress_bar: + _get_repo_tree_contents(local_path, repo_tree, progress_bar) + + return data + + def _github_search_repo(self, query: str): + """Search github repo.""" + data = [] + logging.info(f"Searching github repos with query: {query}") + results = self.client.search_repositories(query) + # Add repo urls and descriptions + urls = list(map(lambda x: x.html_url, results)) + discriptions = list(map(lambda x: x.description, results)) + data.append( + { + "content": clean_string(desc), + "meta_data": { + "url": url, + }, + } + for url, desc in zip(urls, discriptions) + ) + + # Add repo contents + for result in results: + clone_url = result.clone_url + logging.info(f"Cloning repository: {clone_url}") + data = self._get_github_repo_data(clone_url) + return data + + def _github_search_issues_and_pr(self, query: str, type: str): + """Search github issues and PRs.""" + data = [] + + query = f"{query} is:{type}" + logging.info(f"Searching github for query: {query}") + + results = self.client.search_issues(query) + + logging.info(f"Total results: {results.totalCount}") + for result in tqdm(results, total=results.totalCount, desc=f"Loading {type} from github"): + url = result.html_url + title = result.title + body = result.body + if not body: + logging.warn(f"Skipping issue because empty content for: {url}") + continue + labels = " ".join([label.name for label in result.labels]) + issue_comments = result.get_comments() + comments = [] + comments_created_at = [] + for comment in issue_comments: + comments_created_at.append(str(comment.created_at)) + comments.append(f"{comment.user.name}:{comment.body}") + content = "\n".join([title, labels, body, *comments]) + metadata = { + "url": url, + "created_at": str(result.created_at), + "comments_created_at": " ".join(comments_created_at), + } + data.append( + { + "content": clean_string(content), + "meta_data": metadata, + } + ) + return data + + # need to test more for discussion + def _github_search_discussions(self, query: str): + """Search github discussions.""" + data = [] + + query = f"{query} is:discussion" + logging.info(f"Searching github repo for query: {query}") + repos_results = self.client.search_repositories(query) + logging.info(f"Total repos found: {repos_results.totalCount}") + for repo_result in tqdm(repos_results, total=repos_results.totalCount, desc="Loading discussions from github"): + teams = repo_result.get_teams() + # import pdb; pdb.set_trace() + for team in teams: + team_discussions = team.get_discussions() + for discussion in team_discussions: + url = discussion.html_url + title = discussion.title + body = discussion.body + if not body: + logging.warn(f"Skipping discussion because empty content for: {url}") + continue + comments = [] + comments_created_at = [] + print("Discussion comments: ", discussion.comments_url) + content = "\n".join([title, body, *comments]) + metadata = { + "url": url, + "created_at": str(discussion.created_at), + "comments_created_at": " ".join(comments_created_at), + } + data.append( + { + "content": clean_string(content), + "meta_data": metadata, + } + ) + return data + + def _search_github_data(self, search_type: str, query: str): + """Search github data.""" + if search_type == "code": + data = self._github_search_code(query) + elif search_type == "repo": + data = self._github_search_repo(query) + elif search_type == "issue": + data = self._github_search_issues_and_pr(query, search_type) + elif search_type == "pr": + data = self._github_search_issues_and_pr(query, search_type) + elif search_type == "discussion": + raise ValueError("GithubLoader does not support searching discussions yet.") + + return data + + def _get_valid_github_query(self, query: str): + """Check if query is valid and return search types and valid github query.""" + query_terms = shlex.split(query) + # query must provide repo to load data from + if len(query_terms) < 1 or "repo:" not in query: + raise ValueError( + "GithubLoader requires a search query with `repo:` term. Refer docs - `https://docs.embedchain.ai/data-sources/github`" # noqa: E501 + ) + + github_query = [] + types = set() + type_pattern = r"type:([a-zA-Z,]+)" + for term in query_terms: + term_match = re.search(type_pattern, term) + if term_match: + search_types = term_match.group(1).split(",") + types.update(search_types) + else: + github_query.append(term) + + # query must provide search type + if len(types) == 0: + raise ValueError( + "GithubLoader requires a search query with `type:` term. Refer docs - `https://docs.embedchain.ai/data-sources/github`" # noqa: E501 + ) + + for search_type in search_types: + if search_type not in VALID_SEARCH_TYPES: + raise ValueError( + f"Invalid search type: {search_type}. Valid types are: {', '.join(VALID_SEARCH_TYPES)}" + ) + + query = " ".join(github_query) + + return types, query + + def load_data(self, search_query: str, max_results: int = 1000): + """Load data from github search query.""" + + if not self.client: + raise ValueError( + "GithubLoader client is not initialized, data will not be loaded. Refer docs - `https://docs.embedchain.ai/data-sources/github`" # noqa: E501 + ) + + search_types, query = self._get_valid_github_query(search_query) + logging.info(f"Searching github for query: {query}, with types: {', '.join(search_types)}") + + data = [] + + with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + futures_map = executor.map(self._search_github_data, search_types, [query] * len(search_types)) + for search_data in tqdm(futures_map, total=len(search_types), desc="Searching data from github"): + data.extend(search_data) - source_hash = hashlib.sha256(repo_url.encode()).hexdigest() - repo_path = f"/tmp/{source_hash}" - _fetch_or_clone_repo(repo_url=repo_url, local_path=repo_path) - _add_repo_files(repo_path) - doc_id = hashlib.sha256((repo_url + ", ".join(data_urls)).encode()).hexdigest() return { - "doc_id": doc_id, + "doc_id": hashlib.sha256(query.encode()).hexdigest(), "data": data, } diff --git a/poetry.lock b/poetry.lock index b8305abb..573aef51 100644 --- a/poetry.lock +++ b/poetry.lock @@ -337,6 +337,26 @@ description = "The uncompromising code formatter." optional = false python-versions = ">=3.8" files = [ + {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, + {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, + {file = "black-23.9.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:75a2dc41b183d4872d3a500d2b9c9016e67ed95738a3624f4751a0cb4818fe71"}, + {file = "black-23.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13a2e4a93bb8ca74a749b6974925c27219bb3df4d42fc45e948a5d9feb5122b7"}, + {file = "black-23.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:adc3e4442eef57f99b5590b245a328aad19c99552e0bdc7f0b04db6656debd80"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:8431445bf62d2a914b541da7ab3e2b4f3bc052d2ccbf157ebad18ea126efb91f"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:8fc1ddcf83f996247505db6b715294eba56ea9372e107fd54963c7553f2b6dfe"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:7d30ec46de88091e4316b17ae58bbbfc12b2de05e069030f6b747dfc649ad186"}, + {file = "black-23.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f"}, + {file = "black-23.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:538efb451cd50f43aba394e9ec7ad55a37598faae3348d723b59ea8e91616300"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:638619a559280de0c2aa4d76f504891c9860bb8fa214267358f0a20f27c12948"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:a732b82747235e0542c03bf352c126052c0fbc458d8a239a94701175b17d4855"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:cf3a4d00e4cdb6734b64bf23cd4341421e8953615cba6b3670453737a72ec204"}, + {file = "black-23.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf99f3de8b3273a8317681d8194ea222f10e0133a24a7548c73ce44ea1679377"}, + {file = "black-23.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:14f04c990259576acd093871e7e9b14918eb28f1866f91968ff5524293f9c573"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:c619f063c2d68f19b2d7270f4cf3192cb81c9ec5bc5ba02df91471d0b88c4c5c"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:6a3b50e4b93f43b34a9d3ef00d9b6728b4a722c997c99ab09102fd5efdb88325"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c46767e8df1b7beefb0899c4a95fb43058fa8500b6db144f4ff3ca38eb2f6393"}, + {file = "black-23.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50254ebfa56aa46a9fdd5d651f9637485068a1adf42270148cd101cdf56e0ad9"}, + {file = "black-23.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:403397c033adbc45c2bd41747da1f7fc7eaa44efbee256b53842470d4ac5a70f"}, {file = "black-23.9.1-py3-none-any.whl", hash = "sha256:6ccd59584cc834b6d127628713e4b6b968e5f79572da66284532525a042549f9"}, {file = "black-23.9.1.tar.gz", hash = "sha256:24b6b3ff5c6d9ea08a8888f6977eae858e1f340d7260cf56d70a49823236b62d"}, ] @@ -2050,7 +2070,7 @@ files = [ {file = "greenlet-3.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0b72b802496cccbd9b31acea72b6f87e7771ccfd7f7927437d592e5c92ed703c"}, {file = "greenlet-3.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:527cd90ba3d8d7ae7dceb06fda619895768a46a1b4e423bdb24c1969823b8362"}, {file = "greenlet-3.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:37f60b3a42d8b5499be910d1267b24355c495064f271cfe74bf28b17b099133c"}, - {file = "greenlet-3.0.0-cp311-universal2-macosx_10_9_universal2.whl", hash = "sha256:c3692ecf3fe754c8c0f2c95ff19626584459eab110eaab66413b1e7425cd84e9"}, + {file = "greenlet-3.0.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1482fba7fbed96ea7842b5a7fc11d61727e8be75a077e603e8ab49d24e234383"}, {file = "greenlet-3.0.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:be557119bf467d37a8099d91fbf11b2de5eb1fd5fc5b91598407574848dc910f"}, {file = "greenlet-3.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b2f1922a39d5d59cc0e597987300df3396b148a9bd10b76a058a2f2772fc04"}, {file = "greenlet-3.0.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1e22c22f7826096ad503e9bb681b05b8c1f5a8138469b255eb91f26a76634f2"}, @@ -2060,7 +2080,6 @@ files = [ {file = "greenlet-3.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:952256c2bc5b4ee8df8dfc54fc4de330970bf5d79253c863fb5e6761f00dda35"}, {file = "greenlet-3.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:269d06fa0f9624455ce08ae0179430eea61085e3cf6457f05982b37fd2cefe17"}, {file = "greenlet-3.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9adbd8ecf097e34ada8efde9b6fec4dd2a903b1e98037adf72d12993a1c80b51"}, - {file = "greenlet-3.0.0-cp312-universal2-macosx_10_9_universal2.whl", hash = "sha256:553d6fb2324e7f4f0899e5ad2c427a4579ed4873f42124beba763f16032959af"}, {file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6b5ce7f40f0e2f8b88c28e6691ca6806814157ff05e794cdd161be928550f4c"}, {file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecf94aa539e97a8411b5ea52fc6ccd8371be9550c4041011a091eb8b3ca1d810"}, {file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80dcd3c938cbcac986c5c92779db8e8ce51a89a849c135172c88ecbdc8c056b7"}, @@ -3106,6 +3125,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -4829,6 +4858,23 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pygithub" +version = "1.59.1" +description = "Use the full Github API v3" +optional = true +python-versions = ">=3.7" +files = [ + {file = "PyGithub-1.59.1-py3-none-any.whl", hash = "sha256:3d87a822e6c868142f0c2c4bf16cce4696b5a7a4d142a7bd160e1bdf75bc54a9"}, + {file = "PyGithub-1.59.1.tar.gz", hash = "sha256:c44e3a121c15bf9d3a5cc98d94c9a047a5132a9b01d22264627f58ade9ddc217"}, +] + +[package.dependencies] +deprecated = "*" +pyjwt = {version = ">=2.4.0", extras = ["crypto"]} +pynacl = ">=1.4.0" +requests = ">=2.14.0" + [[package]] name = "pyjwt" version = "2.8.0" @@ -4840,6 +4886,9 @@ files = [ {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, ] +[package.dependencies] +cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} + [package.extras] crypto = ["cryptography (>=3.4.0)"] dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] @@ -4866,6 +4915,32 @@ protobuf = ">=3.20.0" requests = "*" ujson = ">=2.0.0" +[[package]] +name = "pynacl" +version = "1.5.0" +description = "Python binding to the Networking and Cryptography (NaCl) library" +optional = true +python-versions = ">=3.6" +files = [ + {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"}, + {file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"}, +] + +[package.dependencies] +cffi = ">=1.4.1" + +[package.extras] +docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"] +tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] + [[package]] name = "pypandoc" version = "1.11" @@ -5215,6 +5290,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -5222,8 +5298,15 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -5240,6 +5323,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -5247,6 +5331,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -5774,6 +5859,11 @@ files = [ {file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f66eddfda9d45dd6cadcd706b65669ce1df84b8549875691b1f403730bdef217"}, {file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6448c37741145b241eeac617028ba6ec2119e1339b1385c9720dae31367f2be"}, {file = "scikit_learn-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c413c2c850241998168bbb3bd1bb59ff03b1195a53864f0b80ab092071af6028"}, + {file = "scikit_learn-1.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ef540e09873e31569bc8b02c8a9f745ee04d8e1263255a15c9969f6f5caa627f"}, + {file = "scikit_learn-1.3.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9147a3a4df4d401e618713880be023e36109c85d8569b3bf5377e6cd3fecdeac"}, + {file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2cd3634695ad192bf71645702b3df498bd1e246fc2d529effdb45a06ab028b4"}, + {file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c275a06c5190c5ce00af0acbb61c06374087949f643ef32d355ece12c4db043"}, + {file = "scikit_learn-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:0e1aa8f206d0de814b81b41d60c1ce31f7f2c7354597af38fae46d9c47c45122"}, {file = "scikit_learn-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:52b77cc08bd555969ec5150788ed50276f5ef83abb72e6f469c5b91a0009bbca"}, {file = "scikit_learn-1.3.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a683394bc3f80b7c312c27f9b14ebea7766b1f0a34faf1a2e9158d80e860ec26"}, {file = "scikit_learn-1.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15d964d9eb181c79c190d3dbc2fff7338786bf017e9039571418a1d53dab236"}, @@ -6088,13 +6178,54 @@ description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ + {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f146c61ae128ab43ea3a0955de1af7e1633942c2b2b4985ac51cc292daf33222"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:875de9414393e778b655a3d97d60465eb3fae7c919e88b70cc10b40b9f56042d"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13790cb42f917c45c9c850b39b9941539ca8ee7917dacf099cc0b569f3d40da7"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e04ab55cf49daf1aeb8c622c54d23fa4bec91cb051a43cc24351ba97e1dd09f5"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a42c9fa3abcda0dcfad053e49c4f752eef71ecd8c155221e18b99d4224621176"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:14cd3bcbb853379fef2cd01e7c64a5d6f1d005406d877ed9509afb7a05ff40a5"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-win32.whl", hash = "sha256:d143c5a9dada696bcfdb96ba2de4a47d5a89168e71d05a076e88a01386872f97"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-win_amd64.whl", hash = "sha256:ccd87c25e4c8559e1b918d46b4fa90b37f459c9b4566f1dfbce0eb8122571547"}, {file = "SQLAlchemy-2.0.22-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f6ff392b27a743c1ad346d215655503cec64405d3b694228b3454878bf21590"}, {file = "SQLAlchemy-2.0.22-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f776c2c30f0e5f4db45c3ee11a5f2a8d9de68e81eb73ec4237de1e32e04ae81c"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8f1792d20d2f4e875ce7a113f43c3561ad12b34ff796b84002a256f37ce9437"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d80eeb5189d7d4b1af519fc3f148fe7521b9dfce8f4d6a0820e8f5769b005051"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:69fd9e41cf9368afa034e1c81f3570afb96f30fcd2eb1ef29cb4d9371c6eece2"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54bcceaf4eebef07dadfde424f5c26b491e4a64e61761dea9459103ecd6ccc95"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-win32.whl", hash = "sha256:7ee7ccf47aa503033b6afd57efbac6b9e05180f492aeed9fcf70752556f95624"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-win_amd64.whl", hash = "sha256:b560f075c151900587ade06706b0c51d04b3277c111151997ea0813455378ae0"}, {file = "SQLAlchemy-2.0.22-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2c9bac865ee06d27a1533471405ad240a6f5d83195eca481f9fc4a71d8b87df8"}, {file = "SQLAlchemy-2.0.22-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:625b72d77ac8ac23da3b1622e2da88c4aedaee14df47c8432bf8f6495e655de2"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b39a6e21110204a8c08d40ff56a73ba542ec60bab701c36ce721e7990df49fb9"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53a766cb0b468223cafdf63e2d37f14a4757476157927b09300c8c5832d88560"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0e1ce8ebd2e040357dde01a3fb7d30d9b5736b3e54a94002641dfd0aa12ae6ce"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:505f503763a767556fa4deae5194b2be056b64ecca72ac65224381a0acab7ebe"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-win32.whl", hash = "sha256:154a32f3c7b00de3d090bc60ec8006a78149e221f1182e3edcf0376016be9396"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-win_amd64.whl", hash = "sha256:129415f89744b05741c6f0b04a84525f37fbabe5dc3774f7edf100e7458c48cd"}, {file = "SQLAlchemy-2.0.22-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3940677d341f2b685a999bffe7078697b5848a40b5f6952794ffcf3af150c301"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55914d45a631b81a8a2cb1a54f03eea265cf1783241ac55396ec6d735be14883"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2096d6b018d242a2bcc9e451618166f860bb0304f590d205173d317b69986c95"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:19c6986cf2fb4bc8e0e846f97f4135a8e753b57d2aaaa87c50f9acbe606bd1db"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ac28bd6888fe3c81fbe97584eb0b96804bd7032d6100b9701255d9441373ec1"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-win32.whl", hash = "sha256:cb9a758ad973e795267da334a92dd82bb7555cb36a0960dcabcf724d26299db8"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-win_amd64.whl", hash = "sha256:40b1206a0d923e73aa54f0a6bd61419a96b914f1cd19900b6c8226899d9742ad"}, {file = "SQLAlchemy-2.0.22-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3aa1472bf44f61dd27987cd051f1c893b7d3b17238bff8c23fceaef4f1133868"}, {file = "SQLAlchemy-2.0.22-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:56a7e2bb639df9263bf6418231bc2a92a773f57886d371ddb7a869a24919face"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccca778c0737a773a1ad86b68bda52a71ad5950b25e120b6eb1330f0df54c3d0"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c6c3e9350f9fb16de5b5e5fbf17b578811a52d71bb784cc5ff71acb7de2a7f9"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:564e9f9e4e6466273dbfab0e0a2e5fe819eec480c57b53a2cdee8e4fdae3ad5f"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:af66001d7b76a3fab0d5e4c1ec9339ac45748bc4a399cbc2baa48c1980d3c1f4"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-win32.whl", hash = "sha256:9e55dff5ec115316dd7a083cdc1a52de63693695aecf72bc53a8e1468ce429e5"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-win_amd64.whl", hash = "sha256:4e869a8ff7ee7a833b74868a0887e8462445ec462432d8cbeff5e85f475186da"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9886a72c8e6371280cb247c5d32c9c8fa141dc560124348762db8a8b236f8692"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a571bc8ac092a3175a1d994794a8e7a1f2f651e7c744de24a19b4f740fe95034"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8db5ba8b7da759b727faebc4289a9e6a51edadc7fc32207a30f7c6203a181592"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b0b3f2686c3f162123adba3cb8b626ed7e9b8433ab528e36ed270b4f70d1cdb"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c1fea8c0abcb070ffe15311853abfda4e55bf7dc1d4889497b3403629f3bf00"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4bb062784f37b2d75fd9b074c8ec360ad5df71f933f927e9e95c50eb8e05323c"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-win32.whl", hash = "sha256:58a3aba1bfb32ae7af68da3f277ed91d9f57620cf7ce651db96636790a78b736"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-win_amd64.whl", hash = "sha256:92e512a6af769e4725fa5b25981ba790335d42c5977e94ded07db7d641490a85"}, + {file = "SQLAlchemy-2.0.22-py3-none-any.whl", hash = "sha256:3076740335e4aaadd7deb3fe6dcb96b3015f1613bd190a4e1634e1b99b02ec86"}, {file = "SQLAlchemy-2.0.22.tar.gz", hash = "sha256:5434cc601aa17570d79e5377f5fd45ff92f9379e2abed0be5e8c2fba8d353d2b"}, ] @@ -7543,7 +7674,7 @@ community = ["llama-hub"] dataloaders = ["beautifulsoup4", "docx2txt", "duckduckgo-search", "pypdf", "pytube", "sentence-transformers", "unstructured", "youtube-transcript-api"] discord = ["discord"] elasticsearch = ["elasticsearch"] -git = ["gitpython"] +github = ["PyGithub", "gitpython"] gmail = ["llama-hub", "requests"] huggingface-hub = ["huggingface_hub"] images = ["ftfy", "pillow", "regex", "torch", "torchvision"] @@ -7567,4 +7698,4 @@ youtube = ["youtube-transcript-api", "yt_dlp"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "ea063cadfefd23d4c9b2a25c9096efe6bcedc367136d819ccb1fd2f510a91206" +content-hash = "776ae7f49adab8a5dc98f6fe7c2887d2e700fd2d7c447383ea81ef05a463c8f3" diff --git a/pyproject.toml b/pyproject.toml index a5cd09a6..4fe200fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,6 +136,7 @@ psycopg-pool = { version = "^3.1.8", optional = true } mysql-connector-python = { version = "^8.1.0", optional = true } gitpython = { version = "^3.1.38", optional = true } yt_dlp = { version = "^2023.11.14", optional = true } +PyGithub = { version = "^1.59.1", optional = true } [tool.poetry.group.dev.dependencies] black = "^23.3.0" @@ -192,7 +193,7 @@ gmail = [ json = ["llama-hub"] postgres = ["psycopg", "psycopg-binary", "psycopg-pool"] mysql = ["mysql-connector-python"] -git = ["gitpython"] +github = ["PyGithub", "gitpython"] youtube = [ "yt_dlp", "youtube-transcript-api", diff --git a/tests/loaders/test_github.py b/tests/loaders/test_github.py new file mode 100644 index 00000000..fe728a89 --- /dev/null +++ b/tests/loaders/test_github.py @@ -0,0 +1,33 @@ +import pytest + +from embedchain.loaders.github import GithubLoader + + +@pytest.fixture +def mock_github_loader_config(): + return { + "token": "your_mock_token", + } + + +@pytest.fixture +def mock_github_loader(mocker, mock_github_loader_config): + mock_github = mocker.patch("github.Github") + _ = mock_github.return_value + return GithubLoader(config=mock_github_loader_config) + + +def test_github_loader_init(mocker, mock_github_loader_config): + mock_github = mocker.patch("github.Github") + GithubLoader(config=mock_github_loader_config) + mock_github.assert_called_once_with("your_mock_token") + + +def test_github_loader_init_empty_config(mocker): + with pytest.raises(ValueError, match="requires a personal access token"): + GithubLoader() + + +def test_github_loader_init_missing_token(): + with pytest.raises(ValueError, match="requires a personal access token"): + GithubLoader(config={})