feat: One App (#635)

Co-authored-by: Taranjeet Singh <reachtotj@gmail.com>
This commit is contained in:
cachho
2023-09-30 08:30:43 +02:00
committed by GitHub
parent 2db07cdb1f
commit 9ecf2e9feb
7 changed files with 359 additions and 178 deletions

View File

@@ -1,15 +1,14 @@
import logging
from typing import Optional
from embedchain.apps.custom_app import CustomApp
from embedchain.apps.app import App
from embedchain.config import CustomAppConfig
from embedchain.embedder.openai import OpenAIEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.llama2 import Llama2Llm
from embedchain.vectordb.chroma import ChromaDB
@register_deserializable
class Llama2App(CustomApp):
class Llama2App(App):
"""
The EmbedChain Llama2App class.
@@ -17,17 +16,23 @@ class Llama2App(CustomApp):
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.59
Use `App` instead.
"""
def __init__(self, config: CustomAppConfig = None, system_prompt: Optional[str] = None):
"""
.. deprecated:: 0.0.59
Use `App` instead.
:param config: CustomAppConfig instance to load as configuration. Optional.
:param system_prompt: System prompt string. Optional.
"""
if config is None:
config = CustomAppConfig()
super().__init__(
config=config, llm=Llama2Llm(), db=ChromaDB(), embedder=OpenAIEmbedder(), system_prompt=system_prompt
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `Llama2App`. "
"`Llama2App` will be removed in a future release. "
"Please refer to https://docs.embedchain.ai/advanced/app_types#llama2app for instructions."
)
super().__init__(config=config, llm=Llama2Llm(), system_prompt=system_prompt)

View File

@@ -1,11 +1,16 @@
import logging
from typing import Optional
from embedchain.config import (AppConfig, BaseEmbedderConfig, BaseLlmConfig,
ChromaDbConfig)
from embedchain.config.vectordb.base import BaseVectorDbConfig
from embedchain.embedchain import EmbedChain
from embedchain.embedder.base import BaseEmbedder
from embedchain.embedder.openai import OpenAIEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.base import BaseLlm
from embedchain.llm.openai import OpenAILlm
from embedchain.vectordb.base import BaseVectorDB
from embedchain.vectordb.chroma import ChromaDB
@@ -23,32 +28,98 @@ class App(EmbedChain):
def __init__(
self,
config: AppConfig = None,
llm_config: BaseLlmConfig = None,
config: Optional[AppConfig] = None,
llm: BaseLlm = None,
llm_config: Optional[BaseLlmConfig] = None,
db: BaseVectorDB = None,
db_config: Optional[BaseVectorDbConfig] = None,
embedder: BaseEmbedder = None,
embedder_config: Optional[BaseEmbedderConfig] = None,
chromadb_config: Optional[ChromaDbConfig] = None,
system_prompt: Optional[str] = None,
):
"""
Initialize a new `CustomApp` instance. You only have a few choices to make.
Initialize a new `App` instance.
:param config: Config for the app instance.
This is the most basic configuration, that does not fall into the LLM, database or embedder category,
defaults to None
:type config: AppConfig, optional
:param config: Config for the app instance., defaults to None
:type config: Optional[AppConfig], optional
:param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to OpenAiLlm
:type llm: BaseLlm, optional
:param llm_config: Allows you to configure the LLM, e.g. how many documents to return,
example: `from embedchain.config import LlmConfig`, defaults to None
:type llm_config: BaseLlmConfig, optional
:param chromadb_config: Allows you to configure the vector database,
:type llm_config: Optional[BaseLlmConfig], optional
:param db: The database to use for storing and retrieving embeddings,
example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to ChromaDb
:type db: BaseVectorDB, optional
:param db_config: Allows you to configure the vector database,
example: `from embedchain.config import ChromaDbConfig`, defaults to None
:type db_config: Optional[BaseVectorDbConfig], optional
:param embedder: The embedder (embedding model and function) use to calculate embeddings.
example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to OpenAIEmbedder
:type embedder: BaseEmbedder, optional
:param embedder_config: Allows you to configure the Embedder.
example: `from embedchain.config import BaseEmbedderConfig`, defaults to None
:type embedder_config: Optional[BaseEmbedderConfig], optional
:param chromadb_config: Deprecated alias of `db_config`, defaults to None
:type chromadb_config: Optional[ChromaDbConfig], optional
:param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
:type system_prompt: Optional[str], optional
:raises TypeError: LLM, database or embedder or their config is not a valid class instance.
"""
# Overwrite deprecated arguments
if chromadb_config:
logging.warning(
"DEPRECATION WARNING: Please use `db_config` argument instead of `chromadb_config`."
"`chromadb_config` will be removed in a future release."
)
db_config = chromadb_config
# Type check configs
if config and not isinstance(config, AppConfig):
raise TypeError(
"Config is not a `AppConfig` instance. "
"Please make sure the type is right and that you are passing an instance."
)
if llm_config and not isinstance(llm_config, BaseLlmConfig):
raise TypeError(
"`llm_config` is not a `BaseLlmConfig` instance. "
"Please make sure the type is right and that you are passing an instance."
)
if db_config and not isinstance(db_config, BaseVectorDbConfig):
raise TypeError(
"`db_config` is not a `BaseVectorDbConfig` instance. "
"Please make sure the type is right and that you are passing an instance."
)
if embedder_config and not isinstance(embedder_config, BaseEmbedderConfig):
raise TypeError(
"`embedder_config` is not a `BaseEmbedderConfig` instance. "
"Please make sure the type is right and that you are passing an instance."
)
# Assign defaults
if config is None:
config = AppConfig()
if llm is None:
llm = OpenAILlm(config=llm_config)
if db is None:
db = ChromaDB(config=db_config)
if embedder is None:
embedder = OpenAIEmbedder(config=embedder_config)
llm = OpenAILlm(config=llm_config)
embedder = OpenAIEmbedder(config=BaseEmbedderConfig(model="text-embedding-ada-002"))
database = ChromaDB(config=chromadb_config)
super().__init__(config, llm, db=database, embedder=embedder, system_prompt=system_prompt)
# Type check assignments
if not isinstance(llm, BaseLlm):
raise TypeError(
"LLM is not a `BaseLlm` instance. "
"Please make sure the type is right and that you are passing an instance."
)
if not isinstance(db, BaseVectorDB):
raise TypeError(
"Database is not a `BaseVectorDB` instance. "
"Please make sure the type is right and that you are passing an instance."
)
if not isinstance(embedder, BaseEmbedder):
raise TypeError(
"Embedder is not a `BaseEmbedder` instance. "
"Please make sure the type is right and that you are passing an instance."
)
super().__init__(config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)

View File

@@ -1,7 +1,8 @@
import logging
from typing import Optional
from embedchain.apps.app import App
from embedchain.config import CustomAppConfig
from embedchain.embedchain import EmbedChain
from embedchain.embedder.base import BaseEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.base import BaseLlm
@@ -9,7 +10,7 @@ from embedchain.vectordb.base import BaseVectorDB
@register_deserializable
class CustomApp(EmbedChain):
class CustomApp(App):
"""
Embedchain's custom app allows for most flexibility.
@@ -19,6 +20,9 @@ class CustomApp(EmbedChain):
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.59
Use `App` instead.
"""
def __init__(
@@ -32,6 +36,9 @@ class CustomApp(EmbedChain):
"""
Initialize a new `CustomApp` instance. You have to choose a LLM, database and embedder.
.. deprecated:: 0.0.59
Use `App` instead.
:param config: Config for the app instance. This is the most basic configuration,
that does not fall into the LLM, database or embedder category, defaults to None
:type config: Optional[CustomAppConfig], optional
@@ -48,36 +55,9 @@ class CustomApp(EmbedChain):
:raises ValueError: LLM, database or embedder has not been defined.
:raises TypeError: LLM, database or embedder is not a valid class instance.
"""
# Config is not required, it has a default
if config is None:
config = CustomAppConfig()
if llm is None:
raise ValueError("LLM must be provided for custom app. Please import from `embedchain.llm`.")
if db is None:
raise ValueError("Database must be provided for custom app. Please import from `embedchain.vectordb`.")
if embedder is None:
raise ValueError("Embedder must be provided for custom app. Please import from `embedchain.embedder`.")
if not isinstance(config, CustomAppConfig):
raise TypeError(
"Config is not a `CustomAppConfig` instance. "
"Please make sure the type is right and that you are passing an instance."
)
if not isinstance(llm, BaseLlm):
raise TypeError(
"LLM is not a `BaseLlm` instance. "
"Please make sure the type is right and that you are passing an instance."
)
if not isinstance(db, BaseVectorDB):
raise TypeError(
"Database is not a `BaseVectorDB` instance. "
"Please make sure the type is right and that you are passing an instance."
)
if not isinstance(embedder, BaseEmbedder):
raise TypeError(
"Embedder is not a `BaseEmbedder` instance. "
"Please make sure the type is right and that you are passing an instance."
)
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `CustomApp`. "
"`CustomApp` will be removed in a future release. "
"Please refer to https://docs.embedchain.ai/advanced/app_types#opensourceapp for instructions."
)
super().__init__(config=config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)

View File

@@ -1,9 +1,9 @@
import logging
from typing import Optional
from embedchain.config import (BaseEmbedderConfig, BaseLlmConfig,
ChromaDbConfig, OpenSourceAppConfig)
from embedchain.embedchain import EmbedChain
from embedchain.apps.app import App
from embedchain.config import (BaseLlmConfig, ChromaDbConfig,
OpenSourceAppConfig)
from embedchain.embedder.gpt4all import GPT4AllEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.gpt4all import GPT4ALLLlm
@@ -13,7 +13,7 @@ gpt4all_model = None
@register_deserializable
class OpenSourceApp(EmbedChain):
class OpenSourceApp(App):
"""
The embedchain Open Source App.
Comes preconfigured with the best open source LLM, embedding model, database.
@@ -22,6 +22,9 @@ class OpenSourceApp(EmbedChain):
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.59
Use `App` instead.
"""
def __init__(
@@ -36,6 +39,9 @@ class OpenSourceApp(EmbedChain):
Since it's opinionated you don't have to choose a LLM, database and embedder.
However, you can configure those.
.. deprecated:: 0.0.59
Use `App` instead.
:param config: Config for the app instance. This is the most basic configuration,
that does not fall into the LLM, database or embedder category, defaults to None
:type config: OpenSourceAppConfig, optional
@@ -50,29 +56,16 @@ class OpenSourceApp(EmbedChain):
:type system_prompt: Optional[str], optional
:raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid.
"""
logging.info("Loading open source embedding model. This may take some time...") # noqa:E501
if not config:
config = OpenSourceAppConfig()
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`."
"`OpenSourceApp` will be removed in a future release."
"Please refer to https://docs.embedchain.ai/advanced/app_types#customapp for instructions."
)
if not isinstance(config, OpenSourceAppConfig):
raise TypeError(
"OpenSourceApp needs a OpenSourceAppConfig passed to it. "
"You can import it with `from embedchain.config import OpenSourceAppConfig`"
)
if not llm_config:
llm_config = BaseLlmConfig(model="orca-mini-3b.ggmlv3.q4_0.bin")
elif not isinstance(llm_config, BaseLlmConfig):
raise TypeError(
"The LlmConfig passed to OpenSourceApp is invalid. "
"You can import it with `from embedchain.config import LlmConfig`"
)
elif not llm_config.model:
llm_config.model = "orca-mini-3b.ggmlv3.q4_0.bin"
llm = GPT4ALLLlm(config=llm_config)
embedder = GPT4AllEmbedder(config=BaseEmbedderConfig(model="all-MiniLM-L6-v2"))
logging.error("Successfully loaded open source embedding model.")
database = ChromaDB(config=chromadb_config)
super().__init__(config, llm=llm, db=database, embedder=embedder, system_prompt=system_prompt)
super().__init__(
config=config,
llm=GPT4ALLLlm(config=llm_config),
db=ChromaDB(config=chromadb_config),
embedder=GPT4AllEmbedder(),
system_prompt=system_prompt,
)