Remove person_app, open_source app, llama2_app with their configs (#829)

This commit is contained in:
Sidharth Mohanty
2023-10-19 13:43:52 +05:30
committed by GitHub
parent b7870fbd9b
commit b5d80be037
13 changed files with 3 additions and 480 deletions

View File

@@ -3,9 +3,4 @@ import importlib.metadata
__version__ = importlib.metadata.version(__package__ or __name__)
from embedchain.apps.app import App # noqa: F401
from embedchain.apps.custom_app import CustomApp # noqa: F401
from embedchain.apps.Llama2App import Llama2App # noqa: F401
from embedchain.apps.open_source_app import OpenSourceApp # noqa: F401
from embedchain.apps.person_app import (PersonApp, # noqa: F401
PersonOpenSourceApp)
from embedchain.vectordb.chroma import ChromaDB # noqa: F401

View File

@@ -1,38 +0,0 @@
import logging
from typing import Optional
from embedchain.apps.app import App
from embedchain.config import CustomAppConfig
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.llama2 import Llama2Llm
@register_deserializable
class Llama2App(App):
"""
The EmbedChain Llama2App class.
Methods:
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.64
Use `App` instead.
"""
def __init__(self, config: CustomAppConfig = None, system_prompt: Optional[str] = None):
"""
.. deprecated:: 0.0.64
Use `App` instead.
:param config: CustomAppConfig instance to load as configuration. Optional.
:param system_prompt: System prompt string. Optional.
"""
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `Llama2App`. "
"`Llama2App` will be removed in a future release. "
"Please refer to https://docs.embedchain.ai/advanced/app_types#llama2app for instructions."
)
super().__init__(config=config, llm=Llama2Llm(), system_prompt=system_prompt)

View File

@@ -1,63 +0,0 @@
import logging
from typing import Optional
from embedchain.apps.app import App
from embedchain.config import CustomAppConfig
from embedchain.embedder.base import BaseEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.base import BaseLlm
from embedchain.vectordb.base import BaseVectorDB
@register_deserializable
class CustomApp(App):
"""
Embedchain's custom app allows for most flexibility.
You can craft your own mix of various LLMs, vector databases and embedding model/functions.
Methods:
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.64
Use `App` instead.
"""
def __init__(
self,
config: Optional[CustomAppConfig] = None,
llm: BaseLlm = None,
db: BaseVectorDB = None,
embedder: BaseEmbedder = None,
system_prompt: Optional[str] = None,
):
"""
Initialize a new `CustomApp` instance. You have to choose a LLM, database and embedder.
.. deprecated:: 0.0.64
Use `App` instead.
:param config: Config for the app instance. This is the most basic configuration,
that does not fall into the LLM, database or embedder category, defaults to None
:type config: Optional[CustomAppConfig], optional
:param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to None
:type llm: BaseLlm
:param db: The database to use for storing and retrieving embeddings,
example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to None
:type db: BaseVectorDB
:param embedder: The embedder (embedding model and function) use to calculate embeddings.
example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to None
:type embedder: BaseEmbedder
:param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
:type system_prompt: Optional[str], optional
:raises ValueError: LLM, database or embedder has not been defined.
:raises TypeError: LLM, database or embedder is not a valid class instance.
"""
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `CustomApp`. "
"`CustomApp` will be removed in a future release. "
"Please refer to https://docs.embedchain.ai/advanced/app_types#opensourceapp for instructions."
)
super().__init__(config=config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)

View File

@@ -1,71 +0,0 @@
import logging
from typing import Optional
from embedchain.apps.app import App
from embedchain.config import (BaseLlmConfig, ChromaDbConfig,
OpenSourceAppConfig)
from embedchain.embedder.gpt4all import GPT4AllEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.gpt4all import GPT4ALLLlm
from embedchain.vectordb.chroma import ChromaDB
gpt4all_model = None
@register_deserializable
class OpenSourceApp(App):
"""
The embedchain Open Source App.
Comes preconfigured with the best open source LLM, embedding model, database.
Methods:
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.64
Use `App` instead.
"""
def __init__(
self,
config: OpenSourceAppConfig = None,
llm_config: BaseLlmConfig = None,
chromadb_config: Optional[ChromaDbConfig] = None,
system_prompt: Optional[str] = None,
):
"""
Initialize a new `CustomApp` instance.
Since it's opinionated you don't have to choose a LLM, database and embedder.
However, you can configure those.
.. deprecated:: 0.0.64
Use `App` instead.
:param config: Config for the app instance. This is the most basic configuration,
that does not fall into the LLM, database or embedder category, defaults to None
:type config: OpenSourceAppConfig, optional
:param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
example: `from embedchain.config import BaseLlmConfig`, defaults to None
:type llm_config: BaseLlmConfig, optional
:param chromadb_config: Allows you to configure the open source database,
example: `from embedchain.config import ChromaDbConfig`, defaults to None
:type chromadb_config: Optional[ChromaDbConfig], optional
:param system_prompt: System prompt that will be provided to the LLM as such.
Please don't use for the time being, as it's not supported., defaults to None
:type system_prompt: Optional[str], optional
:raises TypeError: `OpenSourceAppConfig` or `BaseLlmConfig` invalid.
"""
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`."
"`OpenSourceApp` will be removed in a future release."
"Please refer to https://docs.embedchain.ai/advanced/app_types#customapp for instructions."
)
super().__init__(
config=config,
llm=GPT4ALLLlm(config=llm_config),
db=ChromaDB(config=chromadb_config),
embedder=GPT4AllEmbedder(),
system_prompt=system_prompt,
)

View File

@@ -1,93 +0,0 @@
from string import Template
from embedchain.apps.app import App
from embedchain.apps.open_source_app import OpenSourceApp
from embedchain.config import AppConfig, BaseLlmConfig
from embedchain.config.llm.base import (DEFAULT_PROMPT,
DEFAULT_PROMPT_WITH_HISTORY)
from embedchain.helper.json_serializable import register_deserializable
@register_deserializable
class EmbedChainPersonApp:
"""
Base class to create a person bot.
This bot behaves and speaks like a person.
:param person: name of the person, better if its a well known person.
:param config: AppConfig instance to load as configuration.
"""
def __init__(self, person: str, config: AppConfig = None):
"""Initialize a new person app
:param person: Name of the person that's imitated.
:type person: str
:param config: Configuration class instance, defaults to None
:type config: AppConfig, optional
"""
self.person = person
self.person_prompt = f"You are {person}. Whatever you say, you will always say in {person} style." # noqa:E501
super().__init__(config)
def add_person_template_to_config(self, default_prompt: str, config: BaseLlmConfig = None):
"""
This method checks if the config object contains a prompt template
if yes it adds the person prompt to it and return the updated config
else it creates a config object with the default prompt added to the person prompt
:param default_prompt: it is the default prompt for query or chat methods
:type default_prompt: str
:param config: _description_, defaults to None
:type config: BaseLlmConfig, optional
:return: The `ChatConfig` instance to use as configuration options.
:rtype: _type_
"""
template = Template(self.person_prompt + " " + default_prompt)
if config:
if config.template:
# Add person prompt to custom user template
config.template = Template(self.person_prompt + " " + config.template.template)
else:
# If no user template is present, use person prompt with the default template
config.template = template
else:
# if no config is present at all, initialize the config with person prompt and default template
config = BaseLlmConfig(
template=template,
)
return config
@register_deserializable
class PersonApp(EmbedChainPersonApp, App):
"""
The Person app.
Extends functionality from EmbedChainPersonApp and App
"""
def query(self, input_query, config: BaseLlmConfig = None, dry_run=False):
config = self.add_person_template_to_config(DEFAULT_PROMPT, config)
return super().query(input_query, config, dry_run, where=None)
def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False, where=None):
config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config)
return super().chat(input_query, config, dry_run, where)
@register_deserializable
class PersonOpenSourceApp(EmbedChainPersonApp, OpenSourceApp):
"""
The Person app.
Extends functionality from EmbedChainPersonApp and OpenSourceApp
"""
def query(self, input_query, config: BaseLlmConfig = None, dry_run=False):
config = self.add_person_template_to_config(DEFAULT_PROMPT, config)
return super().query(input_query, config, dry_run)
def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False):
config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config)
return super().chat(input_query, config, dry_run)

View File

@@ -2,8 +2,6 @@
from .add_config import AddConfig, ChunkerConfig
from .apps.app_config import AppConfig
from .apps.custom_app_config import CustomAppConfig
from .apps.open_source_app_config import OpenSourceAppConfig
from .base_config import BaseConfig
from .embedder.base import BaseEmbedderConfig
from .embedder.base import BaseEmbedderConfig as EmbedderConfig

View File

@@ -8,7 +8,7 @@ from embedchain.vectordb.base import BaseVectorDB
class BaseAppConfig(BaseConfig, JSONSerializable):
"""
Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`.
Parent config to initialize an instance of `App`.
"""
def __init__(

View File

@@ -1,46 +0,0 @@
from typing import Optional
from dotenv import load_dotenv
from embedchain.helper.json_serializable import register_deserializable
from embedchain.vectordb.base import BaseVectorDB
from .base_app_config import BaseAppConfig
load_dotenv()
@register_deserializable
class CustomAppConfig(BaseAppConfig):
"""
Config to initialize an embedchain custom `App` instance, with extra config options.
"""
def __init__(
self,
log_level: str = "WARNING",
db: Optional[BaseVectorDB] = None,
id: Optional[str] = None,
collect_metrics: Optional[bool] = None,
collection_name: Optional[str] = None,
):
"""
Initializes a configuration class instance for an Custom App.
Most of the configuration is done in the `CustomApp` class itself.
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
:type log_level: str, optional
:param db: A database class. It is recommended to set this directly in the `CustomApp` class, not this config,
defaults to None
:type db: Optional[BaseVectorDB], optional
:param id: ID of the app. Document metadata will have this id., defaults to None
:type id: Optional[str], optional
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
:type collect_metrics: Optional[bool], optional
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
defaults to None
:type collection_name: Optional[str], optional
"""
super().__init__(
log_level=log_level, db=db, id=id, collect_metrics=collect_metrics, collection_name=collection_name
)

View File

@@ -1,40 +0,0 @@
from typing import Optional
from embedchain.helper.json_serializable import register_deserializable
from .base_app_config import BaseAppConfig
@register_deserializable
class OpenSourceAppConfig(BaseAppConfig):
"""
Config to initialize an embedchain custom `OpenSourceApp` instance, with extra config options.
"""
def __init__(
self,
log_level: str = "WARNING",
id: Optional[str] = None,
collect_metrics: Optional[bool] = None,
model: str = "orca-mini-3b.ggmlv3.q4_0.bin",
collection_name: Optional[str] = None,
):
"""
Initializes a configuration class instance for an Open Source App.
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
:type log_level: str, optional
:param id: ID of the app. Document metadata will have this id., defaults to None
:type id: Optional[str], optional
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
:type collect_metrics: Optional[bool], optional
:param model: GPT4ALL uses the model to instantiate the class.
Unlike `App`, it has to be provided before querying, defaults to "orca-mini-3b.ggmlv3.q4_0.bin"
:type model: str, optional
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
defaults to None
:type collection_name: Optional[str], optional
"""
self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin"
super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, collection_name=collection_name)

View File

@@ -40,7 +40,7 @@ class Llama2Llm(BaseLlm):
def get_llm_model_answer(self, prompt):
# TODO: Move the model and other inputs into config
if self.config.system_prompt:
raise ValueError("Llama2App does not support `system_prompt`")
raise ValueError("Llama2 does not support `system_prompt`")
llm = Replicate(
model=self.config.model,
input={

View File

@@ -3,7 +3,7 @@ import os
import pytest
import yaml
from embedchain import App, CustomApp, Llama2App, OpenSourceApp
from embedchain import App
from embedchain.config import (AddConfig, AppConfig, BaseEmbedderConfig,
BaseLlmConfig, ChromaDbConfig)
from embedchain.embedder.base import BaseEmbedder
@@ -18,49 +18,12 @@ def app():
return App()
@pytest.fixture
def custom_app():
os.environ["OPENAI_API_KEY"] = "test_api_key"
return CustomApp()
@pytest.fixture
def opensource_app():
os.environ["OPENAI_API_KEY"] = "test_api_key"
return OpenSourceApp()
@pytest.fixture
def llama2_app():
os.environ["OPENAI_API_KEY"] = "test_api_key"
os.environ["REPLICATE_API_TOKEN"] = "-"
return Llama2App()
def test_app(app):
assert isinstance(app.llm, BaseLlm)
assert isinstance(app.db, BaseVectorDB)
assert isinstance(app.embedder, BaseEmbedder)
def test_custom_app(custom_app):
assert isinstance(custom_app.llm, BaseLlm)
assert isinstance(custom_app.db, BaseVectorDB)
assert isinstance(custom_app.embedder, BaseEmbedder)
def test_opensource_app(opensource_app):
assert isinstance(opensource_app.llm, BaseLlm)
assert isinstance(opensource_app.db, BaseVectorDB)
assert isinstance(opensource_app.embedder, BaseEmbedder)
def test_llama2_app(llama2_app):
assert isinstance(llama2_app.llm, BaseLlm)
assert isinstance(llama2_app.db, BaseVectorDB)
assert isinstance(llama2_app.embedder, BaseEmbedder)
class TestConfigForAppComponents:
def test_constructor_config(self):
collection_name = "my-test-collection"

View File

@@ -1,81 +0,0 @@
import pytest
from embedchain.apps.app import App
from embedchain.apps.person_app import PersonApp, PersonOpenSourceApp
from embedchain.config import AppConfig, BaseLlmConfig
from embedchain.config.llm.base import DEFAULT_PROMPT
@pytest.fixture
def person_app():
config = AppConfig()
return PersonApp("John Doe", config)
@pytest.fixture
def opensource_person_app():
config = AppConfig()
return PersonOpenSourceApp("John Doe", config)
def test_person_app_initialization(person_app):
assert person_app.person == "John Doe"
assert f"You are {person_app.person}" in person_app.person_prompt
assert isinstance(person_app.config, AppConfig)
def test_person_app_add_person_template_to_config_with_invalid_template():
app = PersonApp("John Doe")
default_prompt = "Input Prompt"
with pytest.raises(ValueError):
# as prompt doesn't contain $context and $query
app.add_person_template_to_config(default_prompt)
def test_person_app_add_person_template_to_config_with_valid_template():
app = PersonApp("John Doe")
config = app.add_person_template_to_config(DEFAULT_PROMPT)
assert (
config.template.template
== f"You are John Doe. Whatever you say, you will always say in John Doe style. {DEFAULT_PROMPT}"
)
def test_person_app_query(mocker, person_app):
input_query = "Hello, how are you?"
config = BaseLlmConfig()
mocker.patch.object(App, "query", return_value="Mocked response")
result = person_app.query(input_query, config)
assert result == "Mocked response"
def test_person_app_chat(mocker, person_app):
input_query = "Hello, how are you?"
config = BaseLlmConfig()
mocker.patch.object(App, "chat", return_value="Mocked chat response")
result = person_app.chat(input_query, config)
assert result == "Mocked chat response"
def test_opensource_person_app_query(mocker, opensource_person_app):
input_query = "Hello, how are you?"
config = BaseLlmConfig()
mocker.patch.object(App, "query", return_value="Mocked response")
result = opensource_person_app.query(input_query, config)
assert result == "Mocked response"
def test_opensource_person_app_chat(mocker, opensource_person_app):
input_query = "Hello, how are you?"
config = BaseLlmConfig()
mocker.patch.object(App, "chat", return_value="Mocked chat response")
result = opensource_person_app.chat(input_query, config)
assert result == "Mocked chat response"

View File

@@ -63,7 +63,6 @@ class TestJsonSerializable(unittest.TestCase):
config = AppConfig(id=random_id, collect_metrics=False)
# config class is set under app.config.
app = App(config=config)
# w/o recursion it would just be <embedchain.config.apps.OpenSourceAppConfig.OpenSourceAppConfig object at x>
s = app.serialize()
new_app: App = App.deserialize(s)
# The id of the new app is the same as the first one.