Remove person_app, open_source app, llama2_app with their configs (#829)

2023-10-19 13:43:52 +05:30
parent b7870fbd9b
commit b5d80be037
13 changed files with 3 additions and 480 deletions
--- a/embedchain/init.py
+++ b/embedchain/init.py
@@ -3,9 +3,4 @@ import importlib.metadata
 __version__ = importlib.metadata.version(__package__ or __name__)
 from embedchain.apps.app import App  # noqa: F401
 from embedchain.apps.custom_app import CustomApp  # noqa: F401
 from embedchain.apps.Llama2App import Llama2App  # noqa: F401
 from embedchain.apps.open_source_app import OpenSourceApp  # noqa: F401
 from embedchain.apps.person_app import (PersonApp,  # noqa: F401
                                        PersonOpenSourceApp)
 from embedchain.vectordb.chroma import ChromaDB  # noqa: F401
--- a/embedchain/apps/Llama2App.py
+++ b/embedchain/apps/Llama2App.py
@@ -1,38 +0,0 @@
 import logging
 from typing import Optional
 from embedchain.apps.app import App
 from embedchain.config import CustomAppConfig
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.llm.llama2 import Llama2Llm
@register_deserializable
 class Llama2App(App):
    """
    The EmbedChain Llama2App class.
    Methods:
    add(source, data_type): adds the data from the given URL to the vector db.
    query(query): finds answer to the given query using vector database and LLM.
    chat(query): finds answer to the given query using vector database and LLM, with conversation history.
    .. deprecated:: 0.0.64
    Use `App` instead.
    """
    def __init__(self, config: CustomAppConfig = None, system_prompt: Optional[str] = None):
        """
        .. deprecated:: 0.0.64
        Use `App` instead.
        :param config: CustomAppConfig instance to load as configuration. Optional.
        :param system_prompt: System prompt string. Optional.
        """
        logging.warning(
            "DEPRECATION WARNING: Please use `App` instead of `Llama2App`. "
            "`Llama2App` will be removed in a future release. "
            "Please refer to https://docs.embedchain.ai/advanced/app_types#llama2app for instructions."
        )
        super().__init__(config=config, llm=Llama2Llm(), system_prompt=system_prompt)
--- a/embedchain/apps/custom_app.py
+++ b/embedchain/apps/custom_app.py
@@ -1,63 +0,0 @@
 import logging
 from typing import Optional
 from embedchain.apps.app import App
 from embedchain.config import CustomAppConfig
 from embedchain.embedder.base import BaseEmbedder
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.llm.base import BaseLlm
 from embedchain.vectordb.base import BaseVectorDB
@register_deserializable
 class CustomApp(App):
    """
    Embedchain's custom app allows for most flexibility.
    You can craft your own mix of various LLMs, vector databases and embedding model/functions.
    Methods:
    add(source, data_type): adds the data from the given URL to the vector db.
    query(query): finds answer to the given query using vector database and LLM.
    chat(query): finds answer to the given query using vector database and LLM, with conversation history.
    .. deprecated:: 0.0.64
        Use `App` instead.
    """
    def __init__(
        self,
        config: Optional[CustomAppConfig] = None,
        llm: BaseLlm = None,
        db: BaseVectorDB = None,
        embedder: BaseEmbedder = None,
        system_prompt: Optional[str] = None,
    ):
        """
        Initialize a new `CustomApp` instance. You have to choose a LLM, database and embedder.
        .. deprecated:: 0.0.64
        Use `App` instead.
        :param config: Config for the app instance. This is the most basic configuration,
        that does not fall into the LLM, database or embedder category, defaults to None
        :type config: Optional[CustomAppConfig], optional
        :param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to None
        :type llm: BaseLlm
        :param db: The database to use for storing and retrieving embeddings,
        example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to None
        :type db: BaseVectorDB
        :param embedder: The embedder (embedding model and function) use to calculate embeddings.
        example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to None
        :type embedder: BaseEmbedder
        :param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
        :type system_prompt: Optional[str], optional
        :raises ValueError: LLM, database or embedder has not been defined.
        :raises TypeError: LLM, database or embedder is not a valid class instance.
        """
        logging.warning(
            "DEPRECATION WARNING: Please use `App` instead of `CustomApp`. "
            "`CustomApp` will be removed in a future release. "
            "Please refer to https://docs.embedchain.ai/advanced/app_types#opensourceapp for instructions."
        )
        super().__init__(config=config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)
--- a/embedchain/apps/open_source_app.py
+++ b/embedchain/apps/open_source_app.py
@@ -1,71 +0,0 @@
 import logging
 from typing import Optional
 from embedchain.apps.app import App
 from embedchain.config import (BaseLlmConfig, ChromaDbConfig,
                               OpenSourceAppConfig)
 from embedchain.embedder.gpt4all import GPT4AllEmbedder
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.llm.gpt4all import GPT4ALLLlm
 from embedchain.vectordb.chroma import ChromaDB
 gpt4all_model = None
@register_deserializable
 class OpenSourceApp(App):
    """
    The embedchain Open Source App.
    Comes preconfigured with the best open source LLM, embedding model, database.
    Methods:
    add(source, data_type): adds the data from the given URL to the vector db.
    query(query): finds answer to the given query using vector database and LLM.
    chat(query): finds answer to the given query using vector database and LLM, with conversation history.
    .. deprecated:: 0.0.64
    Use `App` instead.
    """
    def __init__(
        self,
        config: OpenSourceAppConfig = None,
        llm_config: BaseLlmConfig = None,
        chromadb_config: Optional[ChromaDbConfig] = None,
        system_prompt: Optional[str] = None,
    ):
        """
        Initialize a new `CustomApp` instance.
        Since it's opinionated you don't have to choose a LLM, database and embedder.
        However, you can configure those.
        .. deprecated:: 0.0.64
        Use `App` instead.
        :param config: Config for the app instance. This is the most basic configuration,
        that does not fall into the LLM, database or embedder category, defaults to None
        :type config: OpenSourceAppConfig, optional
        :param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
        example: `from embedchain.config import BaseLlmConfig`, defaults to None
        :type llm_config: BaseLlmConfig, optional
        :param chromadb_config: Allows you to configure the open source database,
        example: `from embedchain.config import ChromaDbConfig`, defaults to None
        :type chromadb_config: Optional[ChromaDbConfig], optional
        :param system_prompt: System prompt that will be provided to the LLM as such.
        Please don't use for the time being, as it's not supported., defaults to None
        :type system_prompt: Optional[str], optional
        :raises TypeError: `OpenSourceAppConfig` or `BaseLlmConfig` invalid.
        """
        logging.warning(
            "DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`."
            "`OpenSourceApp` will be removed in a future release."
            "Please refer to https://docs.embedchain.ai/advanced/app_types#customapp for instructions."
        )
        super().__init__(
            config=config,
            llm=GPT4ALLLlm(config=llm_config),
            db=ChromaDB(config=chromadb_config),
            embedder=GPT4AllEmbedder(),
            system_prompt=system_prompt,
        )
--- a/embedchain/apps/person_app.py
+++ b/embedchain/apps/person_app.py
@@ -1,93 +0,0 @@
 from string import Template
 from embedchain.apps.app import App
 from embedchain.apps.open_source_app import OpenSourceApp
 from embedchain.config import AppConfig, BaseLlmConfig
 from embedchain.config.llm.base import (DEFAULT_PROMPT,
                                        DEFAULT_PROMPT_WITH_HISTORY)
 from embedchain.helper.json_serializable import register_deserializable
@register_deserializable
 class EmbedChainPersonApp:
    """
    Base class to create a person bot.
    This bot behaves and speaks like a person.
    :param person: name of the person, better if its a well known person.
    :param config: AppConfig instance to load as configuration.
    """
    def __init__(self, person: str, config: AppConfig = None):
        """Initialize a new person app
        :param person: Name of the person that's imitated.
        :type person: str
        :param config: Configuration class instance, defaults to None
        :type config: AppConfig, optional
        """
        self.person = person
        self.person_prompt = f"You are {person}. Whatever you say, you will always say in {person} style."  # noqa:E501
        super().__init__(config)
    def add_person_template_to_config(self, default_prompt: str, config: BaseLlmConfig = None):
        """
        This method checks if the config object contains a prompt template
        if yes it adds the person prompt to it and return the updated config
        else it creates a config object with the default prompt added to the person prompt
        :param default_prompt:  it is the default prompt for query or chat methods
        :type default_prompt: str
        :param config: _description_, defaults to None
        :type config: BaseLlmConfig, optional
        :return: The `ChatConfig` instance to use as configuration options.
        :rtype: _type_
        """
        template = Template(self.person_prompt + " " + default_prompt)
        if config:
            if config.template:
                # Add person prompt to custom user template
                config.template = Template(self.person_prompt + " " + config.template.template)
            else:
                # If no user template is present, use person prompt with the default template
                config.template = template
        else:
            # if no config is present at all, initialize the config with person prompt and default template
            config = BaseLlmConfig(
                template=template,
            )
        return config
@register_deserializable
 class PersonApp(EmbedChainPersonApp, App):
    """
    The Person app.
    Extends functionality from EmbedChainPersonApp and App
    """
    def query(self, input_query, config: BaseLlmConfig = None, dry_run=False):
        config = self.add_person_template_to_config(DEFAULT_PROMPT, config)
        return super().query(input_query, config, dry_run, where=None)
    def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False, where=None):
        config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config)
        return super().chat(input_query, config, dry_run, where)
@register_deserializable
 class PersonOpenSourceApp(EmbedChainPersonApp, OpenSourceApp):
    """
    The Person app.
    Extends functionality from EmbedChainPersonApp and OpenSourceApp
    """
    def query(self, input_query, config: BaseLlmConfig = None, dry_run=False):
        config = self.add_person_template_to_config(DEFAULT_PROMPT, config)
        return super().query(input_query, config, dry_run)
    def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False):
        config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config)
        return super().chat(input_query, config, dry_run)
--- a/embedchain/config/init.py
+++ b/embedchain/config/init.py
@@ -2,8 +2,6 @@
 from .add_config import AddConfig, ChunkerConfig
 from .apps.app_config import AppConfig
 from .apps.custom_app_config import CustomAppConfig
 from .apps.open_source_app_config import OpenSourceAppConfig
 from .base_config import BaseConfig
 from .embedder.base import BaseEmbedderConfig
 from .embedder.base import BaseEmbedderConfig as EmbedderConfig
--- a/embedchain/config/apps/base_app_config.py
+++ b/embedchain/config/apps/base_app_config.py
@@ -8,7 +8,7 @@ from embedchain.vectordb.base import BaseVectorDB
 class BaseAppConfig(BaseConfig, JSONSerializable):
    """
-    Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
+    Parent config to initialize an instance of `App`.
    """
    def __init__(
--- a/embedchain/config/apps/custom_app_config.py
+++ b/embedchain/config/apps/custom_app_config.py
@@ -1,46 +0,0 @@
 from typing import Optional
 from dotenv import load_dotenv
 from embedchain.helper.json_serializable import register_deserializable
 from embedchain.vectordb.base import BaseVectorDB
 from .base_app_config import BaseAppConfig
 load_dotenv()
@register_deserializable
 class CustomAppConfig(BaseAppConfig):
    """
    Config to initialize an embedchain custom `App` instance, with extra config options.
    """
    def __init__(
        self,
        log_level: str = "WARNING",
        db: Optional[BaseVectorDB] = None,
        id: Optional[str] = None,
        collect_metrics: Optional[bool] = None,
        collection_name: Optional[str] = None,
    ):
        """
        Initializes a configuration class instance for an Custom App.
        Most of the configuration is done in the `CustomApp` class itself.
        :param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
        :type log_level: str, optional
        :param db: A database class. It is recommended to set this directly in the `CustomApp` class, not this config,
        defaults to None
        :type db: Optional[BaseVectorDB], optional
        :param id: ID of the app. Document metadata will have this id., defaults to None
        :type id: Optional[str], optional
        :param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
        :type collect_metrics: Optional[bool], optional
        :param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
        defaults to None
        :type collection_name: Optional[str], optional
        """
        super().__init__(
            log_level=log_level, db=db, id=id, collect_metrics=collect_metrics, collection_name=collection_name
        )
--- a/embedchain/config/apps/open_source_app_config.py
+++ b/embedchain/config/apps/open_source_app_config.py
@@ -1,40 +0,0 @@
 from typing import Optional
 from embedchain.helper.json_serializable import register_deserializable
 from .base_app_config import BaseAppConfig
@register_deserializable
 class OpenSourceAppConfig(BaseAppConfig):
    """
    Config to initialize an embedchain custom `OpenSourceApp` instance, with extra config options.
    """
    def __init__(
        self,
        log_level: str = "WARNING",
        id: Optional[str] = None,
        collect_metrics: Optional[bool] = None,
        model: str = "orca-mini-3b.ggmlv3.q4_0.bin",
        collection_name: Optional[str] = None,
    ):
        """
        Initializes a configuration class instance for an Open Source App.
        :param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
        :type log_level: str, optional
        :param id: ID of the app. Document metadata will have this id., defaults to None
        :type id: Optional[str], optional
        :param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
        :type collect_metrics: Optional[bool], optional
        :param model: GPT4ALL uses the model to instantiate the class.
        Unlike `App`, it has to be provided before querying, defaults to "orca-mini-3b.ggmlv3.q4_0.bin"
        :type model: str, optional
        :param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
        defaults to None
        :type collection_name: Optional[str], optional
        """
        self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin"
        super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, collection_name=collection_name)
--- a/embedchain/llm/llama2.py
+++ b/embedchain/llm/llama2.py
@@ -40,7 +40,7 @@ class Llama2Llm(BaseLlm):
    def get_llm_model_answer(self, prompt):
        # TODO: Move the model and other inputs into config
        if self.config.system_prompt:
-            raise ValueError("Llama2App does not support `system_prompt`")
+            raise ValueError("Llama2 does not support `system_prompt`")
        llm = Replicate(
            model=self.config.model,
            input={
--- a/tests/apps/test_apps.py
+++ b/tests/apps/test_apps.py
@@ -3,7 +3,7 @@ import os
 import pytest
 import yaml
-from embedchain import App, CustomApp, Llama2App, OpenSourceApp
+from embedchain import App
 from embedchain.config import (AddConfig, AppConfig, BaseEmbedderConfig,
                               BaseLlmConfig, ChromaDbConfig)
 from embedchain.embedder.base import BaseEmbedder
@@ -18,49 +18,12 @@ def app():
    return App()
@pytest.fixture
 def custom_app():
    os.environ["OPENAI_API_KEY"] = "test_api_key"
    return CustomApp()
@pytest.fixture
 def opensource_app():
    os.environ["OPENAI_API_KEY"] = "test_api_key"
    return OpenSourceApp()
@pytest.fixture
 def llama2_app():
    os.environ["OPENAI_API_KEY"] = "test_api_key"
    os.environ["REPLICATE_API_TOKEN"] = "-"
    return Llama2App()
 def test_app(app):
    assert isinstance(app.llm, BaseLlm)
    assert isinstance(app.db, BaseVectorDB)
    assert isinstance(app.embedder, BaseEmbedder)
 def test_custom_app(custom_app):
    assert isinstance(custom_app.llm, BaseLlm)
    assert isinstance(custom_app.db, BaseVectorDB)
    assert isinstance(custom_app.embedder, BaseEmbedder)
 def test_opensource_app(opensource_app):
    assert isinstance(opensource_app.llm, BaseLlm)
    assert isinstance(opensource_app.db, BaseVectorDB)
    assert isinstance(opensource_app.embedder, BaseEmbedder)
 def test_llama2_app(llama2_app):
    assert isinstance(llama2_app.llm, BaseLlm)
    assert isinstance(llama2_app.db, BaseVectorDB)
    assert isinstance(llama2_app.embedder, BaseEmbedder)
 class TestConfigForAppComponents:
    def test_constructor_config(self):
        collection_name = "my-test-collection"
--- a/tests/apps/test_person_app.py
+++ b/tests/apps/test_person_app.py
@@ -1,81 +0,0 @@
 import pytest
 from embedchain.apps.app import App
 from embedchain.apps.person_app import PersonApp, PersonOpenSourceApp
 from embedchain.config import AppConfig, BaseLlmConfig
 from embedchain.config.llm.base import DEFAULT_PROMPT
@pytest.fixture
 def person_app():
    config = AppConfig()
    return PersonApp("John Doe", config)
@pytest.fixture
 def opensource_person_app():
    config = AppConfig()
    return PersonOpenSourceApp("John Doe", config)
 def test_person_app_initialization(person_app):
    assert person_app.person == "John Doe"
    assert f"You are {person_app.person}" in person_app.person_prompt
    assert isinstance(person_app.config, AppConfig)
 def test_person_app_add_person_template_to_config_with_invalid_template():
    app = PersonApp("John Doe")
    default_prompt = "Input Prompt"
    with pytest.raises(ValueError):
        # as prompt doesn't contain $context and $query
        app.add_person_template_to_config(default_prompt)
 def test_person_app_add_person_template_to_config_with_valid_template():
    app = PersonApp("John Doe")
    config = app.add_person_template_to_config(DEFAULT_PROMPT)
    assert (
        config.template.template
        == f"You are John Doe. Whatever you say, you will always say in John Doe style. {DEFAULT_PROMPT}"
    )
 def test_person_app_query(mocker, person_app):
    input_query = "Hello, how are you?"
    config = BaseLlmConfig()
    mocker.patch.object(App, "query", return_value="Mocked response")
    result = person_app.query(input_query, config)
    assert result == "Mocked response"
 def test_person_app_chat(mocker, person_app):
    input_query = "Hello, how are you?"
    config = BaseLlmConfig()
    mocker.patch.object(App, "chat", return_value="Mocked chat response")
    result = person_app.chat(input_query, config)
    assert result == "Mocked chat response"
 def test_opensource_person_app_query(mocker, opensource_person_app):
    input_query = "Hello, how are you?"
    config = BaseLlmConfig()
    mocker.patch.object(App, "query", return_value="Mocked response")
    result = opensource_person_app.query(input_query, config)
    assert result == "Mocked response"
 def test_opensource_person_app_chat(mocker, opensource_person_app):
    input_query = "Hello, how are you?"
    config = BaseLlmConfig()
    mocker.patch.object(App, "chat", return_value="Mocked chat response")
    result = opensource_person_app.chat(input_query, config)
    assert result == "Mocked chat response"
--- a/tests/helper_classes/test_json_serializable.py
+++ b/tests/helper_classes/test_json_serializable.py
@@ -63,7 +63,6 @@ class TestJsonSerializable(unittest.TestCase):
        config = AppConfig(id=random_id, collect_metrics=False)
        # config class is set under app.config.
        app = App(config=config)
        # w/o recursion it would just be <embedchain.config.apps.OpenSourceAppConfig.OpenSourceAppConfig object at x>
        s = app.serialize()
        new_app: App = App.deserialize(s)
        # The id of the new app is the same as the first one.