feat: add new custom app (#313)

This commit is contained in:
cachho
2023-07-18 21:24:23 +02:00
committed by GitHub
parent 96143ac496
commit adb7206639
24 changed files with 455 additions and 147 deletions

View File

@@ -104,7 +104,7 @@ class QueryConfig(BaseConfig):
self.temperature = temperature if temperature else 0
self.max_tokens = max_tokens if max_tokens else 1000
self.model = model if model else "gpt-3.5-turbo-0613"
self.model = model
self.top_p = top_p if top_p else 1
if self.validate_template(template):

View File

@@ -18,7 +18,9 @@ class AppConfig(BaseAppConfig):
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
"""
super().__init__(log_level=log_level, ef=AppConfig.default_embedding_function(), host=host, port=port, id=id)
super().__init__(
log_level=log_level, embedding_fn=AppConfig.default_embedding_function(), host=host, port=port, id=id
)
@staticmethod
def default_embedding_function():

View File

@@ -8,11 +8,11 @@ class BaseAppConfig(BaseConfig):
Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
"""
def __init__(self, log_level=None, ef=None, db=None, host=None, port=None, id=None):
def __init__(self, log_level=None, embedding_fn=None, db=None, host=None, port=None, id=None):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param ef: Embedding function to use.
:param embedding_fn: Embedding function to use.
:param db: Optional. (Vector) database instance to use for embeddings.
:param id: Optional. ID of the app. Document metadata will have this id.
:param host: Optional. Hostname for the database server.
@@ -20,26 +20,26 @@ class BaseAppConfig(BaseConfig):
"""
self._setup_logging(log_level)
self.db = db if db else BaseAppConfig.default_db(ef=ef, host=host, port=port)
self.db = db if db else BaseAppConfig.default_db(embedding_fn=embedding_fn, host=host, port=port)
self.id = id
return
@staticmethod
def default_db(ef, host, port):
def default_db(embedding_fn, host, port):
"""
Sets database to default (`ChromaDb`).
:param ef: Embedding function to use in database.
:param embedding_fn: Embedding function to use in database.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
:returns: Default database
:raises ValueError: BaseAppConfig knows no default embedding function.
"""
if ef is None:
if embedding_fn is None:
raise ValueError("ChromaDb cannot be instantiated without an embedding function")
from embedchain.vectordb.chroma_db import ChromaDB
return ChromaDB(ef=ef, host=host, port=port)
return ChromaDB(embedding_fn=embedding_fn, host=host, port=port)
def _setup_logging(self, debug_level):
level = logging.WARNING # Default level

View File

@@ -1,4 +1,15 @@
import logging
from typing import Any
from chromadb.api.types import Documents, Embeddings
from dotenv import load_dotenv
from embedchain.models import EmbeddingFunctions, Providers
from .BaseAppConfig import BaseAppConfig
from embedchain.models import Providers
load_dotenv()
class CustomAppConfig(BaseAppConfig):
@@ -6,14 +17,88 @@ class CustomAppConfig(BaseAppConfig):
Config to initialize an embedchain custom `App` instance, with extra config options.
"""
def __init__(self, log_level=None, ef=None, db=None, host=None, port=None, id=None):
def __init__(
self,
log_level=None,
embedding_fn: EmbeddingFunctions = None,
embedding_fn_model=None,
db=None,
host=None,
port=None,
id=None,
provider: Providers = None,
model=None,
open_source_app_config=None,
):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param ef: Optional. Embedding function to use.
:param embedding_fn: Optional. Embedding function to use.
:param embedding_fn_model: Optional. Model name to use for embedding function.
:param db: Optional. (Vector) database to use for embeddings.
:param id: Optional. ID of the app. Document metadata will have this id.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
:param provider: Optional. (Providers): LLM Provider to use.
:param open_source_app_config: Optional. Config instance needed for open source apps.
"""
super().__init__(log_level=log_level, db=db, host=host, port=port, id=id)
if provider:
self.provider = provider
else:
raise ValueError("CustomApp must have a provider assigned.")
self.open_source_app_config = open_source_app_config
super().__init__(
log_level=log_level,
embedding_fn=CustomAppConfig.embedding_function(embedding_function=embedding_fn, model=embedding_fn_model),
db=db,
host=host,
port=port,
id=id,
)
@staticmethod
def langchain_default_concept(embeddings: Any):
"""
Langchains default function layout for embeddings.
"""
def embed_function(texts: Documents) -> Embeddings:
return embeddings.embed_documents(texts)
return embed_function
@staticmethod
def embedding_function(embedding_function: EmbeddingFunctions, model: str = None):
if not isinstance(embedding_function, EmbeddingFunctions):
raise ValueError(
f"Invalid option: '{embedding_function}'. Expecting one of the following options: {list(map(lambda x: x.value, EmbeddingFunctions))}" # noqa: E501
)
if embedding_function == EmbeddingFunctions.OPENAI:
from langchain.embeddings import OpenAIEmbeddings
if model:
embeddings = OpenAIEmbeddings(model=model)
else:
embeddings = OpenAIEmbeddings()
return CustomAppConfig.langchain_default_concept(embeddings)
elif embedding_function == EmbeddingFunctions.HUGGING_FACE:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name=model)
return CustomAppConfig.langchain_default_concept(embeddings)
elif embedding_function == EmbeddingFunctions.VERTEX_AI:
from langchain.embeddings import VertexAIEmbeddings
embeddings = VertexAIEmbeddings(model_name=model)
return CustomAppConfig.langchain_default_concept(embeddings)
elif embedding_function == EmbeddingFunctions.GPT4ALL:
# Note: We could use langchains GPT4ALL embedding, but it's not available in all versions.
from chromadb.utils import embedding_functions
return embedding_functions.SentenceTransformerEmbeddingFunction(model_name=model)

View File

@@ -8,16 +8,23 @@ class OpenSourceAppConfig(BaseAppConfig):
Config to initialize an embedchain custom `OpenSourceApp` instance, with extra config options.
"""
def __init__(self, log_level=None, host=None, port=None, id=None):
def __init__(self, log_level=None, host=None, port=None, id=None, model=None):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param id: Optional. ID of the app. Document metadata will have this id.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
:param model: Optional. GPT4ALL uses the model to instantiate the class. So unlike `App`, it has to be provided before querying.
"""
self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin"
super().__init__(
log_level=log_level, ef=OpenSourceAppConfig.default_embedding_function(), host=host, port=port, id=id
log_level=log_level,
embedding_fn=OpenSourceAppConfig.default_embedding_function(),
host=host,
port=port,
id=id,
)
@staticmethod