From 5c1f78879f82806587bf10e76c629c32a842252d Mon Sep 17 00:00:00 2001 From: Deshraj Yadav Date: Fri, 5 Jan 2024 14:01:56 +0530 Subject: [PATCH] [Misc] Minor fixes and refactor utils code (#1117) --- embedchain/app.py | 2 +- embedchain/cli.py | 13 ------------- embedchain/embedchain.py | 2 +- embedchain/embedder/base.py | 2 +- embedchain/loaders/beehiiv.py | 2 +- embedchain/loaders/directory_loader.py | 2 +- embedchain/loaders/discourse.py | 2 +- embedchain/loaders/github.py | 2 +- embedchain/loaders/gmail.py | 2 +- embedchain/loaders/google_drive.py | 3 ++- embedchain/loaders/json.py | 2 +- embedchain/loaders/mysql.py | 2 +- embedchain/loaders/notion.py | 2 +- embedchain/loaders/pdf_file.py | 2 +- embedchain/loaders/slack.py | 2 +- embedchain/loaders/substack.py | 2 +- embedchain/loaders/unstructured_file.py | 2 +- embedchain/loaders/web_page.py | 2 +- embedchain/loaders/xml.py | 2 +- embedchain/loaders/youtube_video.py | 2 +- embedchain/store/assistants.py | 2 +- embedchain/utils/__init__.py | 0 embedchain/{utils.py => utils/misc.py} | 3 ++- embedchain/vectordb/chroma.py | 2 +- embedchain/vectordb/elasticsearch.py | 2 +- embedchain/vectordb/pinecone.py | 2 +- pyproject.toml | 2 +- tests/embedchain/test_utils.py | 2 +- tests/test_utils.py | 2 +- 29 files changed, 29 insertions(+), 40 deletions(-) create mode 100644 embedchain/utils/__init__.py rename embedchain/{utils.py => utils/misc.py} (99%) diff --git a/embedchain/app.py b/embedchain/app.py index 288e5650..ad8afc3f 100644 --- a/embedchain/app.py +++ b/embedchain/app.py @@ -23,7 +23,7 @@ from embedchain.helpers.json_serializable import register_deserializable from embedchain.llm.base import BaseLlm from embedchain.llm.openai import OpenAILlm from embedchain.telemetry.posthog import AnonymousTelemetry -from embedchain.utils import validate_config +from embedchain.utils.misc import validate_config from embedchain.vectordb.base import BaseVectorDB from embedchain.vectordb.chroma import ChromaDB diff --git a/embedchain/cli.py b/embedchain/cli.py index 4037b65f..e6cca4c1 100644 --- a/embedchain/cli.py +++ b/embedchain/cli.py @@ -506,19 +506,6 @@ def runserver(): return # Step 5: Install UI requirements and start the UI server - try: - os.chdir("ui") - subprocess.run(["yarn"], check=True) - ui_process = subprocess.Popen(["yarn", "dev"], stdout=None, stderr=None) - console.print("✅ [bold green]UI server started successfully.[/bold green]") - except Exception as e: - console.print(f"❌ [bold red]Failed to start the UI server: {e}[/bold red]") - - # Wait for the subprocesses to complete - api_process.wait() - ui_process.wait() - - # Step 6: Install UI requirements and start the UI server try: os.chdir("ui") subprocess.run(["yarn"], check=True) diff --git a/embedchain/embedchain.py b/embedchain/embedchain.py index bc74d569..190d2f55 100644 --- a/embedchain/embedchain.py +++ b/embedchain/embedchain.py @@ -19,7 +19,7 @@ from embedchain.llm.base import BaseLlm from embedchain.loaders.base_loader import BaseLoader from embedchain.models.data_type import DataType, DirectDataType, IndirectDataType, SpecialDataType from embedchain.telemetry.posthog import AnonymousTelemetry -from embedchain.utils import detect_datatype, is_valid_json_string +from embedchain.utils.misc import detect_datatype, is_valid_json_string from embedchain.vectordb.base import BaseVectorDB load_dotenv() diff --git a/embedchain/embedder/base.py b/embedchain/embedder/base.py index fdad3b2e..60c53c03 100644 --- a/embedchain/embedder/base.py +++ b/embedchain/embedder/base.py @@ -5,7 +5,7 @@ from embedchain.config.embedder.base import BaseEmbedderConfig try: from chromadb.api.types import Embeddable, EmbeddingFunction, Embeddings except RuntimeError: - from embedchain.utils import use_pysqlite3 + from embedchain.utils.misc import use_pysqlite3 use_pysqlite3() from chromadb.api.types import Embeddable, EmbeddingFunction, Embeddings diff --git a/embedchain/loaders/beehiiv.py b/embedchain/loaders/beehiiv.py index f9cc920f..5169f7ae 100644 --- a/embedchain/loaders/beehiiv.py +++ b/embedchain/loaders/beehiiv.py @@ -7,7 +7,7 @@ import requests from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import is_readable +from embedchain.utils.misc import is_readable @register_deserializable diff --git a/embedchain/loaders/directory_loader.py b/embedchain/loaders/directory_loader.py index 9a4bc48c..2790724e 100644 --- a/embedchain/loaders/directory_loader.py +++ b/embedchain/loaders/directory_loader.py @@ -8,7 +8,7 @@ from embedchain.data_formatter.data_formatter import DataFormatter from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader from embedchain.loaders.text_file import TextFileLoader -from embedchain.utils import detect_datatype +from embedchain.utils.misc import detect_datatype @register_deserializable diff --git a/embedchain/loaders/discourse.py b/embedchain/loaders/discourse.py index d5b4da80..363bcb8a 100644 --- a/embedchain/loaders/discourse.py +++ b/embedchain/loaders/discourse.py @@ -6,7 +6,7 @@ from typing import Any, Dict, Optional import requests from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string class DiscourseLoader(BaseLoader): diff --git a/embedchain/loaders/github.py b/embedchain/loaders/github.py index 7c54a59c..bae36973 100644 --- a/embedchain/loaders/github.py +++ b/embedchain/loaders/github.py @@ -9,7 +9,7 @@ from typing import Any, Dict, Optional from tqdm import tqdm from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string GITHUB_URL = "https://github.com" GITHUB_API_URL = "https://api.github.com" diff --git a/embedchain/loaders/gmail.py b/embedchain/loaders/gmail.py index e87819e6..3487a20b 100644 --- a/embedchain/loaders/gmail.py +++ b/embedchain/loaders/gmail.py @@ -20,7 +20,7 @@ except ImportError: ) from None from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string class GmailReader: diff --git a/embedchain/loaders/google_drive.py b/embedchain/loaders/google_drive.py index dde405aa..5db6c8b8 100644 --- a/embedchain/loaders/google_drive.py +++ b/embedchain/loaders/google_drive.py @@ -8,7 +8,8 @@ except ImportError: "Google Drive requires extra dependencies. Install with `pip install embedchain[googledrive]`" ) from None -from langchain.document_loaders import GoogleDriveLoader as Loader, UnstructuredFileIOLoader +from langchain.document_loaders import GoogleDriveLoader as Loader +from langchain.document_loaders import UnstructuredFileIOLoader from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader diff --git a/embedchain/loaders/json.py b/embedchain/loaders/json.py index 51cdd79e..2d090f68 100644 --- a/embedchain/loaders/json.py +++ b/embedchain/loaders/json.py @@ -7,7 +7,7 @@ from typing import Dict, List, Union import requests from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string, is_valid_json_string +from embedchain.utils.misc import clean_string, is_valid_json_string class JSONReader: diff --git a/embedchain/loaders/mysql.py b/embedchain/loaders/mysql.py index 63cfc040..3574b6c0 100644 --- a/embedchain/loaders/mysql.py +++ b/embedchain/loaders/mysql.py @@ -3,7 +3,7 @@ import logging from typing import Any, Dict, Optional from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string class MySQLLoader(BaseLoader): diff --git a/embedchain/loaders/notion.py b/embedchain/loaders/notion.py index 4b45fc8b..d51753b2 100644 --- a/embedchain/loaders/notion.py +++ b/embedchain/loaders/notion.py @@ -7,7 +7,7 @@ import requests from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string class NotionDocument: diff --git a/embedchain/loaders/pdf_file.py b/embedchain/loaders/pdf_file.py index 03495edb..fb97f324 100644 --- a/embedchain/loaders/pdf_file.py +++ b/embedchain/loaders/pdf_file.py @@ -8,7 +8,7 @@ except ImportError: ) from None from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string @register_deserializable diff --git a/embedchain/loaders/slack.py b/embedchain/loaders/slack.py index cb785efb..44de53b5 100644 --- a/embedchain/loaders/slack.py +++ b/embedchain/loaders/slack.py @@ -7,7 +7,7 @@ from typing import Any, Dict, Optional import certifi from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string SLACK_API_BASE_URL = "https://www.slack.com/api/" diff --git a/embedchain/loaders/substack.py b/embedchain/loaders/substack.py index 40278a6b..77903d17 100644 --- a/embedchain/loaders/substack.py +++ b/embedchain/loaders/substack.py @@ -7,7 +7,7 @@ import requests from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import is_readable +from embedchain.utils.misc import is_readable @register_deserializable diff --git a/embedchain/loaders/unstructured_file.py b/embedchain/loaders/unstructured_file.py index 294c596c..c9b9f7e8 100644 --- a/embedchain/loaders/unstructured_file.py +++ b/embedchain/loaders/unstructured_file.py @@ -2,7 +2,7 @@ import hashlib from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string @register_deserializable diff --git a/embedchain/loaders/web_page.py b/embedchain/loaders/web_page.py index ecf03e9d..0995fc86 100644 --- a/embedchain/loaders/web_page.py +++ b/embedchain/loaders/web_page.py @@ -12,7 +12,7 @@ except ImportError: from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string @register_deserializable diff --git a/embedchain/loaders/xml.py b/embedchain/loaders/xml.py index 00fe4770..5436d749 100644 --- a/embedchain/loaders/xml.py +++ b/embedchain/loaders/xml.py @@ -8,7 +8,7 @@ except ImportError: ) from None from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string @register_deserializable diff --git a/embedchain/loaders/youtube_video.py b/embedchain/loaders/youtube_video.py index 2aa08024..4c1a8453 100644 --- a/embedchain/loaders/youtube_video.py +++ b/embedchain/loaders/youtube_video.py @@ -8,7 +8,7 @@ except ImportError: ) from None from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader -from embedchain.utils import clean_string +from embedchain.utils.misc import clean_string @register_deserializable diff --git a/embedchain/store/assistants.py b/embedchain/store/assistants.py index 87f6cb28..3c77d577 100644 --- a/embedchain/store/assistants.py +++ b/embedchain/store/assistants.py @@ -15,7 +15,7 @@ from embedchain.config import AddConfig from embedchain.data_formatter import DataFormatter from embedchain.models.data_type import DataType from embedchain.telemetry.posthog import AnonymousTelemetry -from embedchain.utils import detect_datatype +from embedchain.utils.misc import detect_datatype logging.basicConfig(level=logging.WARN) diff --git a/embedchain/utils/__init__.py b/embedchain/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/embedchain/utils.py b/embedchain/utils/misc.py similarity index 99% rename from embedchain/utils.py rename to embedchain/utils/misc.py index 43665d54..03ed5bb3 100644 --- a/embedchain/utils.py +++ b/embedchain/utils/misc.py @@ -201,7 +201,8 @@ def detect_datatype(source: Any) -> DataType: formatted_source = format_source(str(source), 30) if url: - from langchain.document_loaders.youtube import ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS + from langchain.document_loaders.youtube import \ + ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS if url.netloc in YOUTUBE_ALLOWED_NETLOCS: logging.debug(f"Source of `{formatted_source}` detected as `youtube_video`.") diff --git a/embedchain/vectordb/chroma.py b/embedchain/vectordb/chroma.py index 7763c207..27156ab7 100644 --- a/embedchain/vectordb/chroma.py +++ b/embedchain/vectordb/chroma.py @@ -14,7 +14,7 @@ try: from chromadb.config import Settings from chromadb.errors import InvalidDimensionException except RuntimeError: - from embedchain.utils import use_pysqlite3 + from embedchain.utils.misc import use_pysqlite3 use_pysqlite3() import chromadb diff --git a/embedchain/vectordb/elasticsearch.py b/embedchain/vectordb/elasticsearch.py index 62744f5d..c38e8d5e 100644 --- a/embedchain/vectordb/elasticsearch.py +++ b/embedchain/vectordb/elasticsearch.py @@ -11,7 +11,7 @@ except ImportError: from embedchain.config import ElasticsearchDBConfig from embedchain.helpers.json_serializable import register_deserializable -from embedchain.utils import chunks +from embedchain.utils.misc import chunks from embedchain.vectordb.base import BaseVectorDB diff --git a/embedchain/vectordb/pinecone.py b/embedchain/vectordb/pinecone.py index dd3da4ac..336967d6 100644 --- a/embedchain/vectordb/pinecone.py +++ b/embedchain/vectordb/pinecone.py @@ -10,7 +10,7 @@ except ImportError: from embedchain.config.vectordb.pinecone import PineconeDBConfig from embedchain.helpers.json_serializable import register_deserializable -from embedchain.utils import chunks +from embedchain.utils.misc import chunks from embedchain.vectordb.base import BaseVectorDB diff --git a/pyproject.toml b/pyproject.toml index e642799e..34021617 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.1.52" +version = "0.1.53" description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data" authors = [ "Taranjeet Singh ", diff --git a/tests/embedchain/test_utils.py b/tests/embedchain/test_utils.py index 80365abd..22806b79 100644 --- a/tests/embedchain/test_utils.py +++ b/tests/embedchain/test_utils.py @@ -3,7 +3,7 @@ import unittest from unittest.mock import patch from embedchain.models.data_type import DataType -from embedchain.utils import detect_datatype +from embedchain.utils.misc import detect_datatype class TestApp(unittest.TestCase): diff --git a/tests/test_utils.py b/tests/test_utils.py index fa810643..3e50e1e1 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ import yaml -from embedchain.utils import validate_config +from embedchain.utils.misc import validate_config CONFIG_YAMLS = [ "configs/anthropic.yaml",