Remove person_app, open_source app, llama2_app with their configs (#829)

2023-10-19 13:43:52 +05:30
parent b7870fbd9b
commit b5d80be037
13 changed files with 3 additions and 480 deletions
--- a/embedchain/init.py
+++ b/embedchain/init.py
@@ -3,9 +3,4 @@ import importlib.metadata
 __version__ = importlib.metadata.version(__package__ or __name__)

 from embedchain.apps.app import App  # noqa: F401
-from embedchain.apps.custom_app import CustomApp  # noqa: F401
-from embedchain.apps.Llama2App import Llama2App  # noqa: F401
-from embedchain.apps.open_source_app import OpenSourceApp  # noqa: F401
-from embedchain.apps.person_app import (PersonApp,  # noqa: F401
-                                        PersonOpenSourceApp)
 from embedchain.vectordb.chroma import ChromaDB  # noqa: F401
--- a/embedchain/apps/Llama2App.py
+++ b/embedchain/apps/Llama2App.py
@@ -1,38 +0,0 @@
-import logging
-from typing import Optional
-
-from embedchain.apps.app import App
-from embedchain.config import CustomAppConfig
-from embedchain.helper.json_serializable import register_deserializable
-from embedchain.llm.llama2 import Llama2Llm
-
-
-@register_deserializable
-class Llama2App(App):
-    """
-    The EmbedChain Llama2App class.
-
-    Methods:
-    add(source, data_type): adds the data from the given URL to the vector db.
-    query(query): finds answer to the given query using vector database and LLM.
-    chat(query): finds answer to the given query using vector database and LLM, with conversation history.
-
-    .. deprecated:: 0.0.64
-    Use `App` instead.
-    """
-
-    def __init__(self, config: CustomAppConfig = None, system_prompt: Optional[str] = None):
-        """
-        .. deprecated:: 0.0.64
-        Use `App` instead.
-
-        :param config: CustomAppConfig instance to load as configuration. Optional.
-        :param system_prompt: System prompt string. Optional.
-        """
-        logging.warning(
-            "DEPRECATION WARNING: Please use `App` instead of `Llama2App`. "
-            "`Llama2App` will be removed in a future release. "
-            "Please refer to https://docs.embedchain.ai/advanced/app_types#llama2app for instructions."
-        )
-
-        super().__init__(config=config, llm=Llama2Llm(), system_prompt=system_prompt)
--- a/embedchain/apps/custom_app.py
+++ b/embedchain/apps/custom_app.py
@@ -1,63 +0,0 @@
-import logging
-from typing import Optional
-
-from embedchain.apps.app import App
-from embedchain.config import CustomAppConfig
-from embedchain.embedder.base import BaseEmbedder
-from embedchain.helper.json_serializable import register_deserializable
-from embedchain.llm.base import BaseLlm
-from embedchain.vectordb.base import BaseVectorDB
-
-
-@register_deserializable
-class CustomApp(App):
-    """
-    Embedchain's custom app allows for most flexibility.
-
-    You can craft your own mix of various LLMs, vector databases and embedding model/functions.
-
-    Methods:
-    add(source, data_type): adds the data from the given URL to the vector db.
-    query(query): finds answer to the given query using vector database and LLM.
-    chat(query): finds answer to the given query using vector database and LLM, with conversation history.
-
-    .. deprecated:: 0.0.64
-        Use `App` instead.
-    """
-
-    def __init__(
-        self,
-        config: Optional[CustomAppConfig] = None,
-        llm: BaseLlm = None,
-        db: BaseVectorDB = None,
-        embedder: BaseEmbedder = None,
-        system_prompt: Optional[str] = None,
-    ):
-        """
-        Initialize a new `CustomApp` instance. You have to choose a LLM, database and embedder.
-
-        .. deprecated:: 0.0.64
-        Use `App` instead.
-
-        :param config: Config for the app instance. This is the most basic configuration,
-        that does not fall into the LLM, database or embedder category, defaults to None
-        :type config: Optional[CustomAppConfig], optional
-        :param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to None
-        :type llm: BaseLlm
-        :param db: The database to use for storing and retrieving embeddings,
-        example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to None
-        :type db: BaseVectorDB
-        :param embedder: The embedder (embedding model and function) use to calculate embeddings.
-        example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to None
-        :type embedder: BaseEmbedder
-        :param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
-        :type system_prompt: Optional[str], optional
-        :raises ValueError: LLM, database or embedder has not been defined.
-        :raises TypeError: LLM, database or embedder is not a valid class instance.
-        """
-        logging.warning(
-            "DEPRECATION WARNING: Please use `App` instead of `CustomApp`. "
-            "`CustomApp` will be removed in a future release. "
-            "Please refer to https://docs.embedchain.ai/advanced/app_types#opensourceapp for instructions."
-        )
-        super().__init__(config=config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)
--- a/embedchain/apps/open_source_app.py
+++ b/embedchain/apps/open_source_app.py
@@ -1,71 +0,0 @@
-import logging
-from typing import Optional
-
-from embedchain.apps.app import App
-from embedchain.config import (BaseLlmConfig, ChromaDbConfig,
-                               OpenSourceAppConfig)
-from embedchain.embedder.gpt4all import GPT4AllEmbedder
-from embedchain.helper.json_serializable import register_deserializable
-from embedchain.llm.gpt4all import GPT4ALLLlm
-from embedchain.vectordb.chroma import ChromaDB
-
-gpt4all_model = None
-
-
-@register_deserializable
-class OpenSourceApp(App):
-    """
-    The embedchain Open Source App.
-    Comes preconfigured with the best open source LLM, embedding model, database.
-
-    Methods:
-    add(source, data_type): adds the data from the given URL to the vector db.
-    query(query): finds answer to the given query using vector database and LLM.
-    chat(query): finds answer to the given query using vector database and LLM, with conversation history.
-
-    .. deprecated:: 0.0.64
-    Use `App` instead.
-    """
-
-    def __init__(
-        self,
-        config: OpenSourceAppConfig = None,
-        llm_config: BaseLlmConfig = None,
-        chromadb_config: Optional[ChromaDbConfig] = None,
-        system_prompt: Optional[str] = None,
-    ):
-        """
-        Initialize a new `CustomApp` instance.
-        Since it's opinionated you don't have to choose a LLM, database and embedder.
-        However, you can configure those.
-
-        .. deprecated:: 0.0.64
-        Use `App` instead.
-
-        :param config: Config for the app instance. This is the most basic configuration,
-        that does not fall into the LLM, database or embedder category, defaults to None
-        :type config: OpenSourceAppConfig, optional
-        :param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
-        example: `from embedchain.config import BaseLlmConfig`, defaults to None
-        :type llm_config: BaseLlmConfig, optional
-        :param chromadb_config: Allows you to configure the open source database,
-        example: `from embedchain.config import ChromaDbConfig`, defaults to None
-        :type chromadb_config: Optional[ChromaDbConfig], optional
-        :param system_prompt: System prompt that will be provided to the LLM as such.
-        Please don't use for the time being, as it's not supported., defaults to None
-        :type system_prompt: Optional[str], optional
-        :raises TypeError: `OpenSourceAppConfig` or `BaseLlmConfig` invalid.
-        """
-        logging.warning(
-            "DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`."
-            "`OpenSourceApp` will be removed in a future release."
-            "Please refer to https://docs.embedchain.ai/advanced/app_types#customapp for instructions."
-        )
-
-        super().__init__(
-            config=config,
-            llm=GPT4ALLLlm(config=llm_config),
-            db=ChromaDB(config=chromadb_config),
-            embedder=GPT4AllEmbedder(),
-            system_prompt=system_prompt,
-        )
--- a/embedchain/apps/person_app.py
+++ b/embedchain/apps/person_app.py
@@ -1,93 +0,0 @@
-from string import Template
-
-from embedchain.apps.app import App
-from embedchain.apps.open_source_app import OpenSourceApp
-from embedchain.config import AppConfig, BaseLlmConfig
-from embedchain.config.llm.base import (DEFAULT_PROMPT,
-                                        DEFAULT_PROMPT_WITH_HISTORY)
-from embedchain.helper.json_serializable import register_deserializable
-
-
-@register_deserializable
-class EmbedChainPersonApp:
-    """
-    Base class to create a person bot.
-    This bot behaves and speaks like a person.
-
-    :param person: name of the person, better if its a well known person.
-    :param config: AppConfig instance to load as configuration.
-    """
-
-    def __init__(self, person: str, config: AppConfig = None):
-        """Initialize a new person app
-
-        :param person: Name of the person that's imitated.
-        :type person: str
-        :param config: Configuration class instance, defaults to None
-        :type config: AppConfig, optional
-        """
-        self.person = person
-        self.person_prompt = f"You are {person}. Whatever you say, you will always say in {person} style."  # noqa:E501
-        super().__init__(config)
-
-    def add_person_template_to_config(self, default_prompt: str, config: BaseLlmConfig = None):
-        """
-        This method checks if the config object contains a prompt template
-        if yes it adds the person prompt to it and return the updated config
-        else it creates a config object with the default prompt added to the person prompt
-
-        :param default_prompt:  it is the default prompt for query or chat methods
-        :type default_prompt: str
-        :param config: _description_, defaults to None
-        :type config: BaseLlmConfig, optional
-        :return: The `ChatConfig` instance to use as configuration options.
-        :rtype: _type_
-        """
-        template = Template(self.person_prompt + " " + default_prompt)
-
-        if config:
-            if config.template:
-                # Add person prompt to custom user template
-                config.template = Template(self.person_prompt + " " + config.template.template)
-            else:
-                # If no user template is present, use person prompt with the default template
-                config.template = template
-        else:
-            # if no config is present at all, initialize the config with person prompt and default template
-            config = BaseLlmConfig(
-                template=template,
-            )
-
-        return config
-
-
-@register_deserializable
-class PersonApp(EmbedChainPersonApp, App):
-    """
-    The Person app.
-    Extends functionality from EmbedChainPersonApp and App
-    """
-
-    def query(self, input_query, config: BaseLlmConfig = None, dry_run=False):
-        config = self.add_person_template_to_config(DEFAULT_PROMPT, config)
-        return super().query(input_query, config, dry_run, where=None)
-
-    def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False, where=None):
-        config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config)
-        return super().chat(input_query, config, dry_run, where)
-
-
-@register_deserializable
-class PersonOpenSourceApp(EmbedChainPersonApp, OpenSourceApp):
-    """
-    The Person app.
-    Extends functionality from EmbedChainPersonApp and OpenSourceApp
-    """
-
-    def query(self, input_query, config: BaseLlmConfig = None, dry_run=False):
-        config = self.add_person_template_to_config(DEFAULT_PROMPT, config)
-        return super().query(input_query, config, dry_run)
-
-    def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False):
-        config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config)
-        return super().chat(input_query, config, dry_run)
--- a/embedchain/config/init.py
+++ b/embedchain/config/init.py
@@ -2,8 +2,6 @@

 from .add_config import AddConfig, ChunkerConfig
 from .apps.app_config import AppConfig
-from .apps.custom_app_config import CustomAppConfig
-from .apps.open_source_app_config import OpenSourceAppConfig
 from .base_config import BaseConfig
 from .embedder.base import BaseEmbedderConfig
 from .embedder.base import BaseEmbedderConfig as EmbedderConfig
--- a/embedchain/config/apps/base_app_config.py
+++ b/embedchain/config/apps/base_app_config.py
@@ -8,7 +8,7 @@ from embedchain.vectordb.base import BaseVectorDB

 class BaseAppConfig(BaseConfig, JSONSerializable):
    """
-    Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
+    Parent config to initialize an instance of `App`.
    """

    def __init__(
--- a/embedchain/config/apps/custom_app_config.py
+++ b/embedchain/config/apps/custom_app_config.py
@@ -1,46 +0,0 @@
-from typing import Optional
-
-from dotenv import load_dotenv
-
-from embedchain.helper.json_serializable import register_deserializable
-from embedchain.vectordb.base import BaseVectorDB
-
-from .base_app_config import BaseAppConfig
-
-load_dotenv()
-
-
-@register_deserializable
-class CustomAppConfig(BaseAppConfig):
-    """
-    Config to initialize an embedchain custom `App` instance, with extra config options.
-    """
-
-    def __init__(
-        self,
-        log_level: str = "WARNING",
-        db: Optional[BaseVectorDB] = None,
-        id: Optional[str] = None,
-        collect_metrics: Optional[bool] = None,
-        collection_name: Optional[str] = None,
-    ):
-        """
-        Initializes a configuration class instance for an Custom App.
-        Most of the configuration is done in the `CustomApp` class itself.
-
-        :param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
-        :type log_level: str, optional
-        :param db: A database class. It is recommended to set this directly in the `CustomApp` class, not this config,
-        defaults to None
-        :type db: Optional[BaseVectorDB], optional
-        :param id: ID of the app. Document metadata will have this id., defaults to None
-        :type id: Optional[str], optional
-        :param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
-        :type collect_metrics: Optional[bool], optional
-        :param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
-        defaults to None
-        :type collection_name: Optional[str], optional
-        """
-        super().__init__(
-            log_level=log_level, db=db, id=id, collect_metrics=collect_metrics, collection_name=collection_name
-        )
--- a/embedchain/config/apps/open_source_app_config.py
+++ b/embedchain/config/apps/open_source_app_config.py
@@ -1,40 +0,0 @@
-from typing import Optional
-
-from embedchain.helper.json_serializable import register_deserializable
-
-from .base_app_config import BaseAppConfig
-
-
-@register_deserializable
-class OpenSourceAppConfig(BaseAppConfig):
-    """
-    Config to initialize an embedchain custom `OpenSourceApp` instance, with extra config options.
-    """
-
-    def __init__(
-        self,
-        log_level: str = "WARNING",
-        id: Optional[str] = None,
-        collect_metrics: Optional[bool] = None,
-        model: str = "orca-mini-3b.ggmlv3.q4_0.bin",
-        collection_name: Optional[str] = None,
-    ):
-        """
-        Initializes a configuration class instance for an Open Source App.
-
-        :param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
-        :type log_level: str, optional
-        :param id: ID of the app. Document metadata will have this id., defaults to None
-        :type id: Optional[str], optional
-        :param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
-        :type collect_metrics: Optional[bool], optional
-        :param model: GPT4ALL uses the model to instantiate the class.
-        Unlike `App`, it has to be provided before querying, defaults to "orca-mini-3b.ggmlv3.q4_0.bin"
-        :type model: str, optional
-        :param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
-        defaults to None
-        :type collection_name: Optional[str], optional
-        """
-        self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin"
-
-        super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, collection_name=collection_name)
--- a/embedchain/llm/llama2.py
+++ b/embedchain/llm/llama2.py
@@ -40,7 +40,7 @@ class Llama2Llm(BaseLlm):
    def get_llm_model_answer(self, prompt):
        # TODO: Move the model and other inputs into config
        if self.config.system_prompt:
-            raise ValueError("Llama2App does not support `system_prompt`")
+            raise ValueError("Llama2 does not support `system_prompt`")
        llm = Replicate(
            model=self.config.model,
            input={
--- a/tests/apps/test_apps.py
+++ b/tests/apps/test_apps.py
@@ -3,7 +3,7 @@ import os
 import pytest
 import yaml

-from embedchain import App, CustomApp, Llama2App, OpenSourceApp
+from embedchain import App
 from embedchain.config import (AddConfig, AppConfig, BaseEmbedderConfig,
                               BaseLlmConfig, ChromaDbConfig)
 from embedchain.embedder.base import BaseEmbedder
@@ -18,49 +18,12 @@ def app():
    return App()


-@pytest.fixture
-def custom_app():
-    os.environ["OPENAI_API_KEY"] = "test_api_key"
-    return CustomApp()
-
-
-@pytest.fixture
-def opensource_app():
-    os.environ["OPENAI_API_KEY"] = "test_api_key"
-    return OpenSourceApp()
-
-
-@pytest.fixture
-def llama2_app():
-    os.environ["OPENAI_API_KEY"] = "test_api_key"
-    os.environ["REPLICATE_API_TOKEN"] = "-"
-    return Llama2App()
-
-
 def test_app(app):
    assert isinstance(app.llm, BaseLlm)
    assert isinstance(app.db, BaseVectorDB)
    assert isinstance(app.embedder, BaseEmbedder)


-def test_custom_app(custom_app):
-    assert isinstance(custom_app.llm, BaseLlm)
-    assert isinstance(custom_app.db, BaseVectorDB)
-    assert isinstance(custom_app.embedder, BaseEmbedder)
-
-
-def test_opensource_app(opensource_app):
-    assert isinstance(opensource_app.llm, BaseLlm)
-    assert isinstance(opensource_app.db, BaseVectorDB)
-    assert isinstance(opensource_app.embedder, BaseEmbedder)
-
-
-def test_llama2_app(llama2_app):
-    assert isinstance(llama2_app.llm, BaseLlm)
-    assert isinstance(llama2_app.db, BaseVectorDB)
-    assert isinstance(llama2_app.embedder, BaseEmbedder)
-
-
 class TestConfigForAppComponents:
    def test_constructor_config(self):
        collection_name = "my-test-collection"
--- a/tests/apps/test_person_app.py
+++ b/tests/apps/test_person_app.py
@@ -1,81 +0,0 @@
-import pytest
-
-from embedchain.apps.app import App
-from embedchain.apps.person_app import PersonApp, PersonOpenSourceApp
-from embedchain.config import AppConfig, BaseLlmConfig
-from embedchain.config.llm.base import DEFAULT_PROMPT
-
-
-@pytest.fixture
-def person_app():
-    config = AppConfig()
-    return PersonApp("John Doe", config)
-
-
-@pytest.fixture
-def opensource_person_app():
-    config = AppConfig()
-    return PersonOpenSourceApp("John Doe", config)
-
-
-def test_person_app_initialization(person_app):
-    assert person_app.person == "John Doe"
-    assert f"You are {person_app.person}" in person_app.person_prompt
-    assert isinstance(person_app.config, AppConfig)
-
-
-def test_person_app_add_person_template_to_config_with_invalid_template():
-    app = PersonApp("John Doe")
-    default_prompt = "Input Prompt"
-    with pytest.raises(ValueError):
-        # as prompt doesn't contain $context and $query
-        app.add_person_template_to_config(default_prompt)
-
-
-def test_person_app_add_person_template_to_config_with_valid_template():
-    app = PersonApp("John Doe")
-    config = app.add_person_template_to_config(DEFAULT_PROMPT)
-    assert (
-        config.template.template
-        == f"You are John Doe. Whatever you say, you will always say in John Doe style. {DEFAULT_PROMPT}"
-    )
-
-
-def test_person_app_query(mocker, person_app):
-    input_query = "Hello, how are you?"
-    config = BaseLlmConfig()
-
-    mocker.patch.object(App, "query", return_value="Mocked response")
-
-    result = person_app.query(input_query, config)
-    assert result == "Mocked response"
-
-
-def test_person_app_chat(mocker, person_app):
-    input_query = "Hello, how are you?"
-    config = BaseLlmConfig()
-
-    mocker.patch.object(App, "chat", return_value="Mocked chat response")
-
-    result = person_app.chat(input_query, config)
-    assert result == "Mocked chat response"
-
-
-def test_opensource_person_app_query(mocker, opensource_person_app):
-    input_query = "Hello, how are you?"
-    config = BaseLlmConfig()
-
-    mocker.patch.object(App, "query", return_value="Mocked response")
-
-    result = opensource_person_app.query(input_query, config)
-    assert result == "Mocked response"
-
-
-def test_opensource_person_app_chat(mocker, opensource_person_app):
-    input_query = "Hello, how are you?"
-    config = BaseLlmConfig()
-
-    mocker.patch.object(App, "chat", return_value="Mocked chat response")
-
-    result = opensource_person_app.chat(input_query, config)
-    assert result == "Mocked chat response"
--- a/tests/helper_classes/test_json_serializable.py
+++ b/tests/helper_classes/test_json_serializable.py
@@ -63,7 +63,6 @@ class TestJsonSerializable(unittest.TestCase):
        config = AppConfig(id=random_id, collect_metrics=False)
        # config class is set under app.config.
        app = App(config=config)
-        # w/o recursion it would just be <embedchain.config.apps.OpenSourceAppConfig.OpenSourceAppConfig object at x>
        s = app.serialize()
        new_app: App = App.deserialize(s)
        # The id of the new app is the same as the first one.