refactor: app design concept (#305)

This commit is contained in:
cachho
2023-07-18 01:20:26 +02:00
committed by GitHub
parent 7ed46260b3
commit 0ea278f633
16 changed files with 378 additions and 240 deletions

View File

@@ -1,74 +0,0 @@
import logging
import os
from chromadb.utils import embedding_functions
from embedchain.config.BaseConfig import BaseConfig
class InitConfig(BaseConfig):
"""
Config to initialize an embedchain `App` instance.
"""
def __init__(self, log_level=None, ef=None, db=None, host=None, port=None, id=None):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param ef: Optional. Embedding function to use.
:param db: Optional. (Vector) database to use for embeddings.
:param id: Optional. ID of the app. Document metadata will have this id.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
"""
self._setup_logging(log_level)
self.ef = ef
self.db = db
self.host = host
self.port = port
self.id = id
return
def _set_embedding_function(self, ef):
self.ef = ef
return
def _set_embedding_function_to_default(self):
"""
Sets embedding function to default (`text-embedding-ada-002`).
:raises ValueError: If the template is not valid as template should contain
$context and $query
"""
if os.getenv("OPENAI_API_KEY") is None and os.getenv("OPENAI_ORGANIZATION") is None:
raise ValueError("OPENAI_API_KEY or OPENAI_ORGANIZATION environment variables not provided") # noqa:E501
self.ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"),
organization_id=os.getenv("OPENAI_ORGANIZATION"),
model_name="text-embedding-ada-002",
)
return
def _set_db(self, db):
if db:
self.db = db
return
def _set_db_to_default(self):
"""
Sets database to default (`ChromaDb`).
"""
from embedchain.vectordb.chroma_db import ChromaDB
self.db = ChromaDB(ef=self.ef, host=self.host, port=self.port)
def _setup_logging(self, debug_level):
level = logging.WARNING # Default level
if debug_level is not None:
level = getattr(logging, debug_level.upper(), None)
if not isinstance(level, int):
raise ValueError(f"Invalid log level: {debug_level}")
logging.basicConfig(format="%(asctime)s [%(name)s] [%(levelname)s] %(message)s", level=level)
self.logger = logging.getLogger(__name__)
return

View File

@@ -1,5 +1,7 @@
from .AddConfig import AddConfig, ChunkerConfig # noqa: F401
from .apps.AppConfig import AppConfig # noqa: F401
from .apps.CustomAppConfig import CustomAppConfig # noqa: F401
from .apps.OpenSourceAppConfig import OpenSourceAppConfig # noqa: F401
from .BaseConfig import BaseConfig # noqa: F401
from .ChatConfig import ChatConfig # noqa: F401
from .InitConfig import InitConfig # noqa: F401
from .QueryConfig import QueryConfig # noqa: F401

View File

@@ -0,0 +1,38 @@
import os
from chromadb.utils import embedding_functions
from .BaseAppConfig import BaseAppConfig
class AppConfig(BaseAppConfig):
"""
Config to initialize an embedchain custom `App` instance, with extra config options.
"""
def __init__(self, log_level=None, host=None, port=None, id=None):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param id: Optional. ID of the app. Document metadata will have this id.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
"""
super().__init__(log_level=log_level, ef=AppConfig.default_embedding_function(), host=host, port=port, id=id)
@staticmethod
def default_embedding_function():
"""
Sets embedding function to default (`text-embedding-ada-002`).
:raises ValueError: If the template is not valid as template should contain
$context and $query
:returns: The default embedding function for the app class.
"""
if os.getenv("OPENAI_API_KEY") is None and os.getenv("OPENAI_ORGANIZATION") is None:
raise ValueError("OPENAI_API_KEY or OPENAI_ORGANIZATION environment variables not provided") # noqa:E501
return embedding_functions.OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"),
organization_id=os.getenv("OPENAI_ORGANIZATION"),
model_name="text-embedding-ada-002",
)

View File

@@ -0,0 +1,53 @@
import logging
from embedchain.config.BaseConfig import BaseConfig
class BaseAppConfig(BaseConfig):
"""
Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
"""
def __init__(self, log_level=None, ef=None, db=None, host=None, port=None, id=None):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param ef: Embedding function to use.
:param db: Optional. (Vector) database instance to use for embeddings.
:param id: Optional. ID of the app. Document metadata will have this id.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
"""
self._setup_logging(log_level)
self.db = db if db else BaseAppConfig.default_db(ef=ef, host=host, port=port)
self.id = id
return
@staticmethod
def default_db(ef, host, port):
"""
Sets database to default (`ChromaDb`).
:param ef: Embedding function to use in database.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
:returns: Default database
:raises ValueError: BaseAppConfig knows no default embedding function.
"""
if ef is None:
raise ValueError("ChromaDb cannot be instantiated without an embedding function")
from embedchain.vectordb.chroma_db import ChromaDB
return ChromaDB(ef=ef, host=host, port=port)
def _setup_logging(self, debug_level):
level = logging.WARNING # Default level
if debug_level is not None:
level = getattr(logging, debug_level.upper(), None)
if not isinstance(level, int):
raise ValueError(f"Invalid log level: {debug_level}")
logging.basicConfig(format="%(asctime)s [%(name)s] [%(levelname)s] %(message)s", level=level)
self.logger = logging.getLogger(__name__)
return

View File

@@ -0,0 +1,19 @@
from .BaseAppConfig import BaseAppConfig
class CustomAppConfig(BaseAppConfig):
"""
Config to initialize an embedchain custom `App` instance, with extra config options.
"""
def __init__(self, log_level=None, ef=None, db=None, host=None, port=None, id=None):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param ef: Optional. Embedding function to use.
:param db: Optional. (Vector) database to use for embeddings.
:param id: Optional. ID of the app. Document metadata will have this id.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
"""
super().__init__(log_level=log_level, db=db, host=host, port=port, id=id)

View File

@@ -0,0 +1,30 @@
from chromadb.utils import embedding_functions
from .BaseAppConfig import BaseAppConfig
class OpenSourceAppConfig(BaseAppConfig):
"""
Config to initialize an embedchain custom `OpenSourceApp` instance, with extra config options.
"""
def __init__(self, log_level=None, host=None, port=None, id=None):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param id: Optional. ID of the app. Document metadata will have this id.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
"""
super().__init__(
log_level=log_level, ef=OpenSourceAppConfig.default_embedding_function(), host=host, port=port, id=id
)
@staticmethod
def default_embedding_function():
"""
Sets embedding function to default (`all-MiniLM-L6-v2`).
:returns: The default embedding function
"""
return embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")

View File