diff --git a/embedchain/__init__.py b/embedchain/__init__.py index a0eb7de4..f681ee7f 100644 --- a/embedchain/__init__.py +++ b/embedchain/__init__.py @@ -3,9 +3,4 @@ import importlib.metadata __version__ = importlib.metadata.version(__package__ or __name__) from embedchain.apps.app import App # noqa: F401 -from embedchain.apps.custom_app import CustomApp # noqa: F401 -from embedchain.apps.Llama2App import Llama2App # noqa: F401 -from embedchain.apps.open_source_app import OpenSourceApp # noqa: F401 -from embedchain.apps.person_app import (PersonApp, # noqa: F401 - PersonOpenSourceApp) from embedchain.vectordb.chroma import ChromaDB # noqa: F401 diff --git a/embedchain/apps/Llama2App.py b/embedchain/apps/Llama2App.py deleted file mode 100644 index b8c1259c..00000000 --- a/embedchain/apps/Llama2App.py +++ /dev/null @@ -1,38 +0,0 @@ -import logging -from typing import Optional - -from embedchain.apps.app import App -from embedchain.config import CustomAppConfig -from embedchain.helper.json_serializable import register_deserializable -from embedchain.llm.llama2 import Llama2Llm - - -@register_deserializable -class Llama2App(App): - """ - The EmbedChain Llama2App class. - - Methods: - add(source, data_type): adds the data from the given URL to the vector db. - query(query): finds answer to the given query using vector database and LLM. - chat(query): finds answer to the given query using vector database and LLM, with conversation history. - - .. deprecated:: 0.0.64 - Use `App` instead. - """ - - def __init__(self, config: CustomAppConfig = None, system_prompt: Optional[str] = None): - """ - .. deprecated:: 0.0.64 - Use `App` instead. - - :param config: CustomAppConfig instance to load as configuration. Optional. - :param system_prompt: System prompt string. Optional. - """ - logging.warning( - "DEPRECATION WARNING: Please use `App` instead of `Llama2App`. " - "`Llama2App` will be removed in a future release. " - "Please refer to https://docs.embedchain.ai/advanced/app_types#llama2app for instructions." - ) - - super().__init__(config=config, llm=Llama2Llm(), system_prompt=system_prompt) diff --git a/embedchain/apps/custom_app.py b/embedchain/apps/custom_app.py deleted file mode 100644 index 8c4e4695..00000000 --- a/embedchain/apps/custom_app.py +++ /dev/null @@ -1,63 +0,0 @@ -import logging -from typing import Optional - -from embedchain.apps.app import App -from embedchain.config import CustomAppConfig -from embedchain.embedder.base import BaseEmbedder -from embedchain.helper.json_serializable import register_deserializable -from embedchain.llm.base import BaseLlm -from embedchain.vectordb.base import BaseVectorDB - - -@register_deserializable -class CustomApp(App): - """ - Embedchain's custom app allows for most flexibility. - - You can craft your own mix of various LLMs, vector databases and embedding model/functions. - - Methods: - add(source, data_type): adds the data from the given URL to the vector db. - query(query): finds answer to the given query using vector database and LLM. - chat(query): finds answer to the given query using vector database and LLM, with conversation history. - - .. deprecated:: 0.0.64 - Use `App` instead. - """ - - def __init__( - self, - config: Optional[CustomAppConfig] = None, - llm: BaseLlm = None, - db: BaseVectorDB = None, - embedder: BaseEmbedder = None, - system_prompt: Optional[str] = None, - ): - """ - Initialize a new `CustomApp` instance. You have to choose a LLM, database and embedder. - - .. deprecated:: 0.0.64 - Use `App` instead. - - :param config: Config for the app instance. This is the most basic configuration, - that does not fall into the LLM, database or embedder category, defaults to None - :type config: Optional[CustomAppConfig], optional - :param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to None - :type llm: BaseLlm - :param db: The database to use for storing and retrieving embeddings, - example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to None - :type db: BaseVectorDB - :param embedder: The embedder (embedding model and function) use to calculate embeddings. - example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to None - :type embedder: BaseEmbedder - :param system_prompt: System prompt that will be provided to the LLM as such, defaults to None - :type system_prompt: Optional[str], optional - :raises ValueError: LLM, database or embedder has not been defined. - :raises TypeError: LLM, database or embedder is not a valid class instance. - """ - logging.warning( - "DEPRECATION WARNING: Please use `App` instead of `CustomApp`. " - "`CustomApp` will be removed in a future release. " - "Please refer to https://docs.embedchain.ai/advanced/app_types#opensourceapp for instructions." - ) - super().__init__(config=config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt) diff --git a/embedchain/apps/open_source_app.py b/embedchain/apps/open_source_app.py deleted file mode 100644 index de32d59e..00000000 --- a/embedchain/apps/open_source_app.py +++ /dev/null @@ -1,71 +0,0 @@ -import logging -from typing import Optional - -from embedchain.apps.app import App -from embedchain.config import (BaseLlmConfig, ChromaDbConfig, - OpenSourceAppConfig) -from embedchain.embedder.gpt4all import GPT4AllEmbedder -from embedchain.helper.json_serializable import register_deserializable -from embedchain.llm.gpt4all import GPT4ALLLlm -from embedchain.vectordb.chroma import ChromaDB - -gpt4all_model = None - - -@register_deserializable -class OpenSourceApp(App): - """ - The embedchain Open Source App. - Comes preconfigured with the best open source LLM, embedding model, database. - - Methods: - add(source, data_type): adds the data from the given URL to the vector db. - query(query): finds answer to the given query using vector database and LLM. - chat(query): finds answer to the given query using vector database and LLM, with conversation history. - - .. deprecated:: 0.0.64 - Use `App` instead. - """ - - def __init__( - self, - config: OpenSourceAppConfig = None, - llm_config: BaseLlmConfig = None, - chromadb_config: Optional[ChromaDbConfig] = None, - system_prompt: Optional[str] = None, - ): - """ - Initialize a new `CustomApp` instance. - Since it's opinionated you don't have to choose a LLM, database and embedder. - However, you can configure those. - - .. deprecated:: 0.0.64 - Use `App` instead. - - :param config: Config for the app instance. This is the most basic configuration, - that does not fall into the LLM, database or embedder category, defaults to None - :type config: OpenSourceAppConfig, optional - :param llm_config: Allows you to configure the LLM, e.g. how many documents to return. - example: `from embedchain.config import BaseLlmConfig`, defaults to None - :type llm_config: BaseLlmConfig, optional - :param chromadb_config: Allows you to configure the open source database, - example: `from embedchain.config import ChromaDbConfig`, defaults to None - :type chromadb_config: Optional[ChromaDbConfig], optional - :param system_prompt: System prompt that will be provided to the LLM as such. - Please don't use for the time being, as it's not supported., defaults to None - :type system_prompt: Optional[str], optional - :raises TypeError: `OpenSourceAppConfig` or `BaseLlmConfig` invalid. - """ - logging.warning( - "DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`." - "`OpenSourceApp` will be removed in a future release." - "Please refer to https://docs.embedchain.ai/advanced/app_types#customapp for instructions." - ) - - super().__init__( - config=config, - llm=GPT4ALLLlm(config=llm_config), - db=ChromaDB(config=chromadb_config), - embedder=GPT4AllEmbedder(), - system_prompt=system_prompt, - ) diff --git a/embedchain/apps/person_app.py b/embedchain/apps/person_app.py deleted file mode 100644 index 9d9e07ba..00000000 --- a/embedchain/apps/person_app.py +++ /dev/null @@ -1,93 +0,0 @@ -from string import Template - -from embedchain.apps.app import App -from embedchain.apps.open_source_app import OpenSourceApp -from embedchain.config import AppConfig, BaseLlmConfig -from embedchain.config.llm.base import (DEFAULT_PROMPT, - DEFAULT_PROMPT_WITH_HISTORY) -from embedchain.helper.json_serializable import register_deserializable - - -@register_deserializable -class EmbedChainPersonApp: - """ - Base class to create a person bot. - This bot behaves and speaks like a person. - - :param person: name of the person, better if its a well known person. - :param config: AppConfig instance to load as configuration. - """ - - def __init__(self, person: str, config: AppConfig = None): - """Initialize a new person app - - :param person: Name of the person that's imitated. - :type person: str - :param config: Configuration class instance, defaults to None - :type config: AppConfig, optional - """ - self.person = person - self.person_prompt = f"You are {person}. Whatever you say, you will always say in {person} style." # noqa:E501 - super().__init__(config) - - def add_person_template_to_config(self, default_prompt: str, config: BaseLlmConfig = None): - """ - This method checks if the config object contains a prompt template - if yes it adds the person prompt to it and return the updated config - else it creates a config object with the default prompt added to the person prompt - - :param default_prompt: it is the default prompt for query or chat methods - :type default_prompt: str - :param config: _description_, defaults to None - :type config: BaseLlmConfig, optional - :return: The `ChatConfig` instance to use as configuration options. - :rtype: _type_ - """ - template = Template(self.person_prompt + " " + default_prompt) - - if config: - if config.template: - # Add person prompt to custom user template - config.template = Template(self.person_prompt + " " + config.template.template) - else: - # If no user template is present, use person prompt with the default template - config.template = template - else: - # if no config is present at all, initialize the config with person prompt and default template - config = BaseLlmConfig( - template=template, - ) - - return config - - -@register_deserializable -class PersonApp(EmbedChainPersonApp, App): - """ - The Person app. - Extends functionality from EmbedChainPersonApp and App - """ - - def query(self, input_query, config: BaseLlmConfig = None, dry_run=False): - config = self.add_person_template_to_config(DEFAULT_PROMPT, config) - return super().query(input_query, config, dry_run, where=None) - - def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False, where=None): - config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config) - return super().chat(input_query, config, dry_run, where) - - -@register_deserializable -class PersonOpenSourceApp(EmbedChainPersonApp, OpenSourceApp): - """ - The Person app. - Extends functionality from EmbedChainPersonApp and OpenSourceApp - """ - - def query(self, input_query, config: BaseLlmConfig = None, dry_run=False): - config = self.add_person_template_to_config(DEFAULT_PROMPT, config) - return super().query(input_query, config, dry_run) - - def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False): - config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config) - return super().chat(input_query, config, dry_run) diff --git a/embedchain/config/__init__.py b/embedchain/config/__init__.py index c5b3a247..fb6349a3 100644 --- a/embedchain/config/__init__.py +++ b/embedchain/config/__init__.py @@ -2,8 +2,6 @@ from .add_config import AddConfig, ChunkerConfig from .apps.app_config import AppConfig -from .apps.custom_app_config import CustomAppConfig -from .apps.open_source_app_config import OpenSourceAppConfig from .base_config import BaseConfig from .embedder.base import BaseEmbedderConfig from .embedder.base import BaseEmbedderConfig as EmbedderConfig diff --git a/embedchain/config/apps/base_app_config.py b/embedchain/config/apps/base_app_config.py index 9c25b227..d12b1c63 100644 --- a/embedchain/config/apps/base_app_config.py +++ b/embedchain/config/apps/base_app_config.py @@ -8,7 +8,7 @@ from embedchain.vectordb.base import BaseVectorDB class BaseAppConfig(BaseConfig, JSONSerializable): """ - Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`. + Parent config to initialize an instance of `App`. """ def __init__( diff --git a/embedchain/config/apps/custom_app_config.py b/embedchain/config/apps/custom_app_config.py deleted file mode 100644 index f8bc78f6..00000000 --- a/embedchain/config/apps/custom_app_config.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Optional - -from dotenv import load_dotenv - -from embedchain.helper.json_serializable import register_deserializable -from embedchain.vectordb.base import BaseVectorDB - -from .base_app_config import BaseAppConfig - -load_dotenv() - - -@register_deserializable -class CustomAppConfig(BaseAppConfig): - """ - Config to initialize an embedchain custom `App` instance, with extra config options. - """ - - def __init__( - self, - log_level: str = "WARNING", - db: Optional[BaseVectorDB] = None, - id: Optional[str] = None, - collect_metrics: Optional[bool] = None, - collection_name: Optional[str] = None, - ): - """ - Initializes a configuration class instance for an Custom App. - Most of the configuration is done in the `CustomApp` class itself. - - :param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING" - :type log_level: str, optional - :param db: A database class. It is recommended to set this directly in the `CustomApp` class, not this config, - defaults to None - :type db: Optional[BaseVectorDB], optional - :param id: ID of the app. Document metadata will have this id., defaults to None - :type id: Optional[str], optional - :param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True - :type collect_metrics: Optional[bool], optional - :param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead, - defaults to None - :type collection_name: Optional[str], optional - """ - super().__init__( - log_level=log_level, db=db, id=id, collect_metrics=collect_metrics, collection_name=collection_name - ) diff --git a/embedchain/config/apps/open_source_app_config.py b/embedchain/config/apps/open_source_app_config.py deleted file mode 100644 index f837c011..00000000 --- a/embedchain/config/apps/open_source_app_config.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Optional - -from embedchain.helper.json_serializable import register_deserializable - -from .base_app_config import BaseAppConfig - - -@register_deserializable -class OpenSourceAppConfig(BaseAppConfig): - """ - Config to initialize an embedchain custom `OpenSourceApp` instance, with extra config options. - """ - - def __init__( - self, - log_level: str = "WARNING", - id: Optional[str] = None, - collect_metrics: Optional[bool] = None, - model: str = "orca-mini-3b.ggmlv3.q4_0.bin", - collection_name: Optional[str] = None, - ): - """ - Initializes a configuration class instance for an Open Source App. - - :param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING" - :type log_level: str, optional - :param id: ID of the app. Document metadata will have this id., defaults to None - :type id: Optional[str], optional - :param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True - :type collect_metrics: Optional[bool], optional - :param model: GPT4ALL uses the model to instantiate the class. - Unlike `App`, it has to be provided before querying, defaults to "orca-mini-3b.ggmlv3.q4_0.bin" - :type model: str, optional - :param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead, - defaults to None - :type collection_name: Optional[str], optional - """ - self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin" - - super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, collection_name=collection_name) diff --git a/embedchain/llm/llama2.py b/embedchain/llm/llama2.py index 2e5c8c05..0c65ef53 100644 --- a/embedchain/llm/llama2.py +++ b/embedchain/llm/llama2.py @@ -40,7 +40,7 @@ class Llama2Llm(BaseLlm): def get_llm_model_answer(self, prompt): # TODO: Move the model and other inputs into config if self.config.system_prompt: - raise ValueError("Llama2App does not support `system_prompt`") + raise ValueError("Llama2 does not support `system_prompt`") llm = Replicate( model=self.config.model, input={ diff --git a/tests/apps/test_apps.py b/tests/apps/test_apps.py index d7744c07..1ffe2b20 100644 --- a/tests/apps/test_apps.py +++ b/tests/apps/test_apps.py @@ -3,7 +3,7 @@ import os import pytest import yaml -from embedchain import App, CustomApp, Llama2App, OpenSourceApp +from embedchain import App from embedchain.config import (AddConfig, AppConfig, BaseEmbedderConfig, BaseLlmConfig, ChromaDbConfig) from embedchain.embedder.base import BaseEmbedder @@ -18,49 +18,12 @@ def app(): return App() -@pytest.fixture -def custom_app(): - os.environ["OPENAI_API_KEY"] = "test_api_key" - return CustomApp() - - -@pytest.fixture -def opensource_app(): - os.environ["OPENAI_API_KEY"] = "test_api_key" - return OpenSourceApp() - - -@pytest.fixture -def llama2_app(): - os.environ["OPENAI_API_KEY"] = "test_api_key" - os.environ["REPLICATE_API_TOKEN"] = "-" - return Llama2App() - - def test_app(app): assert isinstance(app.llm, BaseLlm) assert isinstance(app.db, BaseVectorDB) assert isinstance(app.embedder, BaseEmbedder) -def test_custom_app(custom_app): - assert isinstance(custom_app.llm, BaseLlm) - assert isinstance(custom_app.db, BaseVectorDB) - assert isinstance(custom_app.embedder, BaseEmbedder) - - -def test_opensource_app(opensource_app): - assert isinstance(opensource_app.llm, BaseLlm) - assert isinstance(opensource_app.db, BaseVectorDB) - assert isinstance(opensource_app.embedder, BaseEmbedder) - - -def test_llama2_app(llama2_app): - assert isinstance(llama2_app.llm, BaseLlm) - assert isinstance(llama2_app.db, BaseVectorDB) - assert isinstance(llama2_app.embedder, BaseEmbedder) - - class TestConfigForAppComponents: def test_constructor_config(self): collection_name = "my-test-collection" diff --git a/tests/apps/test_person_app.py b/tests/apps/test_person_app.py deleted file mode 100644 index f57899a2..00000000 --- a/tests/apps/test_person_app.py +++ /dev/null @@ -1,81 +0,0 @@ -import pytest - -from embedchain.apps.app import App -from embedchain.apps.person_app import PersonApp, PersonOpenSourceApp -from embedchain.config import AppConfig, BaseLlmConfig -from embedchain.config.llm.base import DEFAULT_PROMPT - - -@pytest.fixture -def person_app(): - config = AppConfig() - return PersonApp("John Doe", config) - - -@pytest.fixture -def opensource_person_app(): - config = AppConfig() - return PersonOpenSourceApp("John Doe", config) - - -def test_person_app_initialization(person_app): - assert person_app.person == "John Doe" - assert f"You are {person_app.person}" in person_app.person_prompt - assert isinstance(person_app.config, AppConfig) - - -def test_person_app_add_person_template_to_config_with_invalid_template(): - app = PersonApp("John Doe") - default_prompt = "Input Prompt" - with pytest.raises(ValueError): - # as prompt doesn't contain $context and $query - app.add_person_template_to_config(default_prompt) - - -def test_person_app_add_person_template_to_config_with_valid_template(): - app = PersonApp("John Doe") - config = app.add_person_template_to_config(DEFAULT_PROMPT) - assert ( - config.template.template - == f"You are John Doe. Whatever you say, you will always say in John Doe style. {DEFAULT_PROMPT}" - ) - - -def test_person_app_query(mocker, person_app): - input_query = "Hello, how are you?" - config = BaseLlmConfig() - - mocker.patch.object(App, "query", return_value="Mocked response") - - result = person_app.query(input_query, config) - assert result == "Mocked response" - - -def test_person_app_chat(mocker, person_app): - input_query = "Hello, how are you?" - config = BaseLlmConfig() - - mocker.patch.object(App, "chat", return_value="Mocked chat response") - - result = person_app.chat(input_query, config) - assert result == "Mocked chat response" - - -def test_opensource_person_app_query(mocker, opensource_person_app): - input_query = "Hello, how are you?" - config = BaseLlmConfig() - - mocker.patch.object(App, "query", return_value="Mocked response") - - result = opensource_person_app.query(input_query, config) - assert result == "Mocked response" - - -def test_opensource_person_app_chat(mocker, opensource_person_app): - input_query = "Hello, how are you?" - config = BaseLlmConfig() - - mocker.patch.object(App, "chat", return_value="Mocked chat response") - - result = opensource_person_app.chat(input_query, config) - assert result == "Mocked chat response" diff --git a/tests/helper_classes/test_json_serializable.py b/tests/helper_classes/test_json_serializable.py index 5153d122..48345fdb 100644 --- a/tests/helper_classes/test_json_serializable.py +++ b/tests/helper_classes/test_json_serializable.py @@ -63,7 +63,6 @@ class TestJsonSerializable(unittest.TestCase): config = AppConfig(id=random_id, collect_metrics=False) # config class is set under app.config. app = App(config=config) - # w/o recursion it would just be s = app.serialize() new_app: App = App.deserialize(s) # The id of the new app is the same as the first one.