[Misc] Minor fixes and refactor utils code (#1117)

This commit is contained in:
Deshraj Yadav
2024-01-05 14:01:56 +05:30
committed by GitHub
parent 94ba82f2a2
commit 5c1f78879f
29 changed files with 29 additions and 40 deletions

View File

@@ -23,7 +23,7 @@ from embedchain.helpers.json_serializable import register_deserializable
from embedchain.llm.base import BaseLlm
from embedchain.llm.openai import OpenAILlm
from embedchain.telemetry.posthog import AnonymousTelemetry
from embedchain.utils import validate_config
from embedchain.utils.misc import validate_config
from embedchain.vectordb.base import BaseVectorDB
from embedchain.vectordb.chroma import ChromaDB

View File

@@ -506,19 +506,6 @@ def runserver():
return
# Step 5: Install UI requirements and start the UI server
try:
os.chdir("ui")
subprocess.run(["yarn"], check=True)
ui_process = subprocess.Popen(["yarn", "dev"], stdout=None, stderr=None)
console.print("✅ [bold green]UI server started successfully.[/bold green]")
except Exception as e:
console.print(f"❌ [bold red]Failed to start the UI server: {e}[/bold red]")
# Wait for the subprocesses to complete
api_process.wait()
ui_process.wait()
# Step 6: Install UI requirements and start the UI server
try:
os.chdir("ui")
subprocess.run(["yarn"], check=True)

View File

@@ -19,7 +19,7 @@ from embedchain.llm.base import BaseLlm
from embedchain.loaders.base_loader import BaseLoader
from embedchain.models.data_type import DataType, DirectDataType, IndirectDataType, SpecialDataType
from embedchain.telemetry.posthog import AnonymousTelemetry
from embedchain.utils import detect_datatype, is_valid_json_string
from embedchain.utils.misc import detect_datatype, is_valid_json_string
from embedchain.vectordb.base import BaseVectorDB
load_dotenv()

View File

@@ -5,7 +5,7 @@ from embedchain.config.embedder.base import BaseEmbedderConfig
try:
from chromadb.api.types import Embeddable, EmbeddingFunction, Embeddings
except RuntimeError:
from embedchain.utils import use_pysqlite3
from embedchain.utils.misc import use_pysqlite3
use_pysqlite3()
from chromadb.api.types import Embeddable, EmbeddingFunction, Embeddings

View File

@@ -7,7 +7,7 @@ import requests
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import is_readable
from embedchain.utils.misc import is_readable
@register_deserializable

View File

@@ -8,7 +8,7 @@ from embedchain.data_formatter.data_formatter import DataFormatter
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.loaders.text_file import TextFileLoader
from embedchain.utils import detect_datatype
from embedchain.utils.misc import detect_datatype
@register_deserializable

View File

@@ -6,7 +6,7 @@ from typing import Any, Dict, Optional
import requests
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
class DiscourseLoader(BaseLoader):

View File

@@ -9,7 +9,7 @@ from typing import Any, Dict, Optional
from tqdm import tqdm
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
GITHUB_URL = "https://github.com"
GITHUB_API_URL = "https://api.github.com"

View File

@@ -20,7 +20,7 @@ except ImportError:
) from None
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
class GmailReader:

View File

@@ -8,7 +8,8 @@ except ImportError:
"Google Drive requires extra dependencies. Install with `pip install embedchain[googledrive]`"
) from None
from langchain.document_loaders import GoogleDriveLoader as Loader, UnstructuredFileIOLoader
from langchain.document_loaders import GoogleDriveLoader as Loader
from langchain.document_loaders import UnstructuredFileIOLoader
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader

View File

@@ -7,7 +7,7 @@ from typing import Dict, List, Union
import requests
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string, is_valid_json_string
from embedchain.utils.misc import clean_string, is_valid_json_string
class JSONReader:

View File

@@ -3,7 +3,7 @@ import logging
from typing import Any, Dict, Optional
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
class MySQLLoader(BaseLoader):

View File

@@ -7,7 +7,7 @@ import requests
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
class NotionDocument:

View File

@@ -8,7 +8,7 @@ except ImportError:
) from None
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
@register_deserializable

View File

@@ -7,7 +7,7 @@ from typing import Any, Dict, Optional
import certifi
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
SLACK_API_BASE_URL = "https://www.slack.com/api/"

View File

@@ -7,7 +7,7 @@ import requests
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import is_readable
from embedchain.utils.misc import is_readable
@register_deserializable

View File

@@ -2,7 +2,7 @@ import hashlib
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
@register_deserializable

View File

@@ -12,7 +12,7 @@ except ImportError:
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
@register_deserializable

View File

@@ -8,7 +8,7 @@ except ImportError:
) from None
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
@register_deserializable

View File

@@ -8,7 +8,7 @@ except ImportError:
) from None
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
from embedchain.utils.misc import clean_string
@register_deserializable

View File

@@ -15,7 +15,7 @@ from embedchain.config import AddConfig
from embedchain.data_formatter import DataFormatter
from embedchain.models.data_type import DataType
from embedchain.telemetry.posthog import AnonymousTelemetry
from embedchain.utils import detect_datatype
from embedchain.utils.misc import detect_datatype
logging.basicConfig(level=logging.WARN)

View File

View File

@@ -201,7 +201,8 @@ def detect_datatype(source: Any) -> DataType:
formatted_source = format_source(str(source), 30)
if url:
from langchain.document_loaders.youtube import ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS
from langchain.document_loaders.youtube import \
ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS
if url.netloc in YOUTUBE_ALLOWED_NETLOCS:
logging.debug(f"Source of `{formatted_source}` detected as `youtube_video`.")

View File

@@ -14,7 +14,7 @@ try:
from chromadb.config import Settings
from chromadb.errors import InvalidDimensionException
except RuntimeError:
from embedchain.utils import use_pysqlite3
from embedchain.utils.misc import use_pysqlite3
use_pysqlite3()
import chromadb

View File

@@ -11,7 +11,7 @@ except ImportError:
from embedchain.config import ElasticsearchDBConfig
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.utils import chunks
from embedchain.utils.misc import chunks
from embedchain.vectordb.base import BaseVectorDB

View File

@@ -10,7 +10,7 @@ except ImportError:
from embedchain.config.vectordb.pinecone import PineconeDBConfig
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.utils import chunks
from embedchain.utils.misc import chunks
from embedchain.vectordb.base import BaseVectorDB

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "embedchain"
version = "0.1.52"
version = "0.1.53"
description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
authors = [
"Taranjeet Singh <taranjeet@embedchain.ai>",

View File

@@ -3,7 +3,7 @@ import unittest
from unittest.mock import patch
from embedchain.models.data_type import DataType
from embedchain.utils import detect_datatype
from embedchain.utils.misc import detect_datatype
class TestApp(unittest.TestCase):

View File

@@ -1,6 +1,6 @@
import yaml
from embedchain.utils import validate_config
from embedchain.utils.misc import validate_config
CONFIG_YAMLS = [
"configs/anthropic.yaml",