Remove person_app, open_source app, llama2_app with their configs (#829)

This commit is contained in:
Sidharth Mohanty
2023-10-19 13:43:52 +05:30
committed by GitHub
parent b7870fbd9b
commit b5d80be037
13 changed files with 3 additions and 480 deletions

View File

@@ -3,9 +3,4 @@ import importlib.metadata
__version__ = importlib.metadata.version(__package__ or __name__) __version__ = importlib.metadata.version(__package__ or __name__)
from embedchain.apps.app import App # noqa: F401 from embedchain.apps.app import App # noqa: F401
from embedchain.apps.custom_app import CustomApp # noqa: F401
from embedchain.apps.Llama2App import Llama2App # noqa: F401
from embedchain.apps.open_source_app import OpenSourceApp # noqa: F401
from embedchain.apps.person_app import (PersonApp, # noqa: F401
PersonOpenSourceApp)
from embedchain.vectordb.chroma import ChromaDB # noqa: F401 from embedchain.vectordb.chroma import ChromaDB # noqa: F401

View File

@@ -1,38 +0,0 @@
import logging
from typing import Optional
from embedchain.apps.app import App
from embedchain.config import CustomAppConfig
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.llama2 import Llama2Llm
@register_deserializable
class Llama2App(App):
"""
The EmbedChain Llama2App class.
Methods:
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.64
Use `App` instead.
"""
def __init__(self, config: CustomAppConfig = None, system_prompt: Optional[str] = None):
"""
.. deprecated:: 0.0.64
Use `App` instead.
:param config: CustomAppConfig instance to load as configuration. Optional.
:param system_prompt: System prompt string. Optional.
"""
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `Llama2App`. "
"`Llama2App` will be removed in a future release. "
"Please refer to https://docs.embedchain.ai/advanced/app_types#llama2app for instructions."
)
super().__init__(config=config, llm=Llama2Llm(), system_prompt=system_prompt)

View File

@@ -1,63 +0,0 @@
import logging
from typing import Optional
from embedchain.apps.app import App
from embedchain.config import CustomAppConfig
from embedchain.embedder.base import BaseEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.base import BaseLlm
from embedchain.vectordb.base import BaseVectorDB
@register_deserializable
class CustomApp(App):
"""
Embedchain's custom app allows for most flexibility.
You can craft your own mix of various LLMs, vector databases and embedding model/functions.
Methods:
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.64
Use `App` instead.
"""
def __init__(
self,
config: Optional[CustomAppConfig] = None,
llm: BaseLlm = None,
db: BaseVectorDB = None,
embedder: BaseEmbedder = None,
system_prompt: Optional[str] = None,
):
"""
Initialize a new `CustomApp` instance. You have to choose a LLM, database and embedder.
.. deprecated:: 0.0.64
Use `App` instead.
:param config: Config for the app instance. This is the most basic configuration,
that does not fall into the LLM, database or embedder category, defaults to None
:type config: Optional[CustomAppConfig], optional
:param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to None
:type llm: BaseLlm
:param db: The database to use for storing and retrieving embeddings,
example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to None
:type db: BaseVectorDB
:param embedder: The embedder (embedding model and function) use to calculate embeddings.
example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to None
:type embedder: BaseEmbedder
:param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
:type system_prompt: Optional[str], optional
:raises ValueError: LLM, database or embedder has not been defined.
:raises TypeError: LLM, database or embedder is not a valid class instance.
"""
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `CustomApp`. "
"`CustomApp` will be removed in a future release. "
"Please refer to https://docs.embedchain.ai/advanced/app_types#opensourceapp for instructions."
)
super().__init__(config=config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)

View File

@@ -1,71 +0,0 @@
import logging
from typing import Optional
from embedchain.apps.app import App
from embedchain.config import (BaseLlmConfig, ChromaDbConfig,
OpenSourceAppConfig)
from embedchain.embedder.gpt4all import GPT4AllEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.gpt4all import GPT4ALLLlm
from embedchain.vectordb.chroma import ChromaDB
gpt4all_model = None
@register_deserializable
class OpenSourceApp(App):
"""
The embedchain Open Source App.
Comes preconfigured with the best open source LLM, embedding model, database.
Methods:
add(source, data_type): adds the data from the given URL to the vector db.
query(query): finds answer to the given query using vector database and LLM.
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
.. deprecated:: 0.0.64
Use `App` instead.
"""
def __init__(
self,
config: OpenSourceAppConfig = None,
llm_config: BaseLlmConfig = None,
chromadb_config: Optional[ChromaDbConfig] = None,
system_prompt: Optional[str] = None,
):
"""
Initialize a new `CustomApp` instance.
Since it's opinionated you don't have to choose a LLM, database and embedder.
However, you can configure those.
.. deprecated:: 0.0.64
Use `App` instead.
:param config: Config for the app instance. This is the most basic configuration,
that does not fall into the LLM, database or embedder category, defaults to None
:type config: OpenSourceAppConfig, optional
:param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
example: `from embedchain.config import BaseLlmConfig`, defaults to None
:type llm_config: BaseLlmConfig, optional
:param chromadb_config: Allows you to configure the open source database,
example: `from embedchain.config import ChromaDbConfig`, defaults to None
:type chromadb_config: Optional[ChromaDbConfig], optional
:param system_prompt: System prompt that will be provided to the LLM as such.
Please don't use for the time being, as it's not supported., defaults to None
:type system_prompt: Optional[str], optional
:raises TypeError: `OpenSourceAppConfig` or `BaseLlmConfig` invalid.
"""
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`."
"`OpenSourceApp` will be removed in a future release."
"Please refer to https://docs.embedchain.ai/advanced/app_types#customapp for instructions."
)
super().__init__(
config=config,
llm=GPT4ALLLlm(config=llm_config),
db=ChromaDB(config=chromadb_config),
embedder=GPT4AllEmbedder(),
system_prompt=system_prompt,
)

View File

@@ -1,93 +0,0 @@
from string import Template
from embedchain.apps.app import App
from embedchain.apps.open_source_app import OpenSourceApp
from embedchain.config import AppConfig, BaseLlmConfig
from embedchain.config.llm.base import (DEFAULT_PROMPT,
DEFAULT_PROMPT_WITH_HISTORY)
from embedchain.helper.json_serializable import register_deserializable
@register_deserializable
class EmbedChainPersonApp:
"""
Base class to create a person bot.
This bot behaves and speaks like a person.
:param person: name of the person, better if its a well known person.
:param config: AppConfig instance to load as configuration.
"""
def __init__(self, person: str, config: AppConfig = None):
"""Initialize a new person app
:param person: Name of the person that's imitated.
:type person: str
:param config: Configuration class instance, defaults to None
:type config: AppConfig, optional
"""
self.person = person
self.person_prompt = f"You are {person}. Whatever you say, you will always say in {person} style." # noqa:E501
super().__init__(config)
def add_person_template_to_config(self, default_prompt: str, config: BaseLlmConfig = None):
"""
This method checks if the config object contains a prompt template
if yes it adds the person prompt to it and return the updated config
else it creates a config object with the default prompt added to the person prompt
:param default_prompt: it is the default prompt for query or chat methods
:type default_prompt: str
:param config: _description_, defaults to None
:type config: BaseLlmConfig, optional
:return: The `ChatConfig` instance to use as configuration options.
:rtype: _type_
"""
template = Template(self.person_prompt + " " + default_prompt)
if config:
if config.template:
# Add person prompt to custom user template
config.template = Template(self.person_prompt + " " + config.template.template)
else:
# If no user template is present, use person prompt with the default template
config.template = template
else:
# if no config is present at all, initialize the config with person prompt and default template
config = BaseLlmConfig(
template=template,
)
return config
@register_deserializable
class PersonApp(EmbedChainPersonApp, App):
"""
The Person app.
Extends functionality from EmbedChainPersonApp and App
"""
def query(self, input_query, config: BaseLlmConfig = None, dry_run=False):
config = self.add_person_template_to_config(DEFAULT_PROMPT, config)
return super().query(input_query, config, dry_run, where=None)
def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False, where=None):
config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config)
return super().chat(input_query, config, dry_run, where)
@register_deserializable
class PersonOpenSourceApp(EmbedChainPersonApp, OpenSourceApp):
"""
The Person app.
Extends functionality from EmbedChainPersonApp and OpenSourceApp
"""
def query(self, input_query, config: BaseLlmConfig = None, dry_run=False):
config = self.add_person_template_to_config(DEFAULT_PROMPT, config)
return super().query(input_query, config, dry_run)
def chat(self, input_query, config: BaseLlmConfig = None, dry_run=False):
config = self.add_person_template_to_config(DEFAULT_PROMPT_WITH_HISTORY, config)
return super().chat(input_query, config, dry_run)

View File

@@ -2,8 +2,6 @@
from .add_config import AddConfig, ChunkerConfig from .add_config import AddConfig, ChunkerConfig
from .apps.app_config import AppConfig from .apps.app_config import AppConfig
from .apps.custom_app_config import CustomAppConfig
from .apps.open_source_app_config import OpenSourceAppConfig
from .base_config import BaseConfig from .base_config import BaseConfig
from .embedder.base import BaseEmbedderConfig from .embedder.base import BaseEmbedderConfig
from .embedder.base import BaseEmbedderConfig as EmbedderConfig from .embedder.base import BaseEmbedderConfig as EmbedderConfig

View File

@@ -8,7 +8,7 @@ from embedchain.vectordb.base import BaseVectorDB
class BaseAppConfig(BaseConfig, JSONSerializable): class BaseAppConfig(BaseConfig, JSONSerializable):
""" """
Parent config to initialize an instance of `App`, `OpenSourceApp` or `CustomApp`. Parent config to initialize an instance of `App`.
""" """
def __init__( def __init__(

View File

@@ -1,46 +0,0 @@
from typing import Optional
from dotenv import load_dotenv
from embedchain.helper.json_serializable import register_deserializable
from embedchain.vectordb.base import BaseVectorDB
from .base_app_config import BaseAppConfig
load_dotenv()
@register_deserializable
class CustomAppConfig(BaseAppConfig):
"""
Config to initialize an embedchain custom `App` instance, with extra config options.
"""
def __init__(
self,
log_level: str = "WARNING",
db: Optional[BaseVectorDB] = None,
id: Optional[str] = None,
collect_metrics: Optional[bool] = None,
collection_name: Optional[str] = None,
):
"""
Initializes a configuration class instance for an Custom App.
Most of the configuration is done in the `CustomApp` class itself.
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
:type log_level: str, optional
:param db: A database class. It is recommended to set this directly in the `CustomApp` class, not this config,
defaults to None
:type db: Optional[BaseVectorDB], optional
:param id: ID of the app. Document metadata will have this id., defaults to None
:type id: Optional[str], optional
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
:type collect_metrics: Optional[bool], optional
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
defaults to None
:type collection_name: Optional[str], optional
"""
super().__init__(
log_level=log_level, db=db, id=id, collect_metrics=collect_metrics, collection_name=collection_name
)

View File

@@ -1,40 +0,0 @@
from typing import Optional
from embedchain.helper.json_serializable import register_deserializable
from .base_app_config import BaseAppConfig
@register_deserializable
class OpenSourceAppConfig(BaseAppConfig):
"""
Config to initialize an embedchain custom `OpenSourceApp` instance, with extra config options.
"""
def __init__(
self,
log_level: str = "WARNING",
id: Optional[str] = None,
collect_metrics: Optional[bool] = None,
model: str = "orca-mini-3b.ggmlv3.q4_0.bin",
collection_name: Optional[str] = None,
):
"""
Initializes a configuration class instance for an Open Source App.
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
:type log_level: str, optional
:param id: ID of the app. Document metadata will have this id., defaults to None
:type id: Optional[str], optional
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
:type collect_metrics: Optional[bool], optional
:param model: GPT4ALL uses the model to instantiate the class.
Unlike `App`, it has to be provided before querying, defaults to "orca-mini-3b.ggmlv3.q4_0.bin"
:type model: str, optional
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
defaults to None
:type collection_name: Optional[str], optional
"""
self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin"
super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, collection_name=collection_name)

View File

@@ -40,7 +40,7 @@ class Llama2Llm(BaseLlm):
def get_llm_model_answer(self, prompt): def get_llm_model_answer(self, prompt):
# TODO: Move the model and other inputs into config # TODO: Move the model and other inputs into config
if self.config.system_prompt: if self.config.system_prompt:
raise ValueError("Llama2App does not support `system_prompt`") raise ValueError("Llama2 does not support `system_prompt`")
llm = Replicate( llm = Replicate(
model=self.config.model, model=self.config.model,
input={ input={

View File

@@ -3,7 +3,7 @@ import os
import pytest import pytest
import yaml import yaml
from embedchain import App, CustomApp, Llama2App, OpenSourceApp from embedchain import App
from embedchain.config import (AddConfig, AppConfig, BaseEmbedderConfig, from embedchain.config import (AddConfig, AppConfig, BaseEmbedderConfig,
BaseLlmConfig, ChromaDbConfig) BaseLlmConfig, ChromaDbConfig)
from embedchain.embedder.base import BaseEmbedder from embedchain.embedder.base import BaseEmbedder
@@ -18,49 +18,12 @@ def app():
return App() return App()
@pytest.fixture
def custom_app():
os.environ["OPENAI_API_KEY"] = "test_api_key"
return CustomApp()
@pytest.fixture
def opensource_app():
os.environ["OPENAI_API_KEY"] = "test_api_key"
return OpenSourceApp()
@pytest.fixture
def llama2_app():
os.environ["OPENAI_API_KEY"] = "test_api_key"
os.environ["REPLICATE_API_TOKEN"] = "-"
return Llama2App()
def test_app(app): def test_app(app):
assert isinstance(app.llm, BaseLlm) assert isinstance(app.llm, BaseLlm)
assert isinstance(app.db, BaseVectorDB) assert isinstance(app.db, BaseVectorDB)
assert isinstance(app.embedder, BaseEmbedder) assert isinstance(app.embedder, BaseEmbedder)
def test_custom_app(custom_app):
assert isinstance(custom_app.llm, BaseLlm)
assert isinstance(custom_app.db, BaseVectorDB)
assert isinstance(custom_app.embedder, BaseEmbedder)
def test_opensource_app(opensource_app):
assert isinstance(opensource_app.llm, BaseLlm)
assert isinstance(opensource_app.db, BaseVectorDB)
assert isinstance(opensource_app.embedder, BaseEmbedder)
def test_llama2_app(llama2_app):
assert isinstance(llama2_app.llm, BaseLlm)
assert isinstance(llama2_app.db, BaseVectorDB)
assert isinstance(llama2_app.embedder, BaseEmbedder)
class TestConfigForAppComponents: class TestConfigForAppComponents:
def test_constructor_config(self): def test_constructor_config(self):
collection_name = "my-test-collection" collection_name = "my-test-collection"

View File

@@ -1,81 +0,0 @@
import pytest
from embedchain.apps.app import App
from embedchain.apps.person_app import PersonApp, PersonOpenSourceApp
from embedchain.config import AppConfig, BaseLlmConfig
from embedchain.config.llm.base import DEFAULT_PROMPT
@pytest.fixture
def person_app():
config = AppConfig()
return PersonApp("John Doe", config)
@pytest.fixture
def opensource_person_app():
config = AppConfig()
return PersonOpenSourceApp("John Doe", config)
def test_person_app_initialization(person_app):
assert person_app.person == "John Doe"
assert f"You are {person_app.person}" in person_app.person_prompt
assert isinstance(person_app.config, AppConfig)
def test_person_app_add_person_template_to_config_with_invalid_template():
app = PersonApp("John Doe")
default_prompt = "Input Prompt"
with pytest.raises(ValueError):
# as prompt doesn't contain $context and $query
app.add_person_template_to_config(default_prompt)
def test_person_app_add_person_template_to_config_with_valid_template():
app = PersonApp("John Doe")
config = app.add_person_template_to_config(DEFAULT_PROMPT)
assert (
config.template.template
== f"You are John Doe. Whatever you say, you will always say in John Doe style. {DEFAULT_PROMPT}"
)
def test_person_app_query(mocker, person_app):
input_query = "Hello, how are you?"
config = BaseLlmConfig()
mocker.patch.object(App, "query", return_value="Mocked response")
result = person_app.query(input_query, config)
assert result == "Mocked response"
def test_person_app_chat(mocker, person_app):
input_query = "Hello, how are you?"
config = BaseLlmConfig()
mocker.patch.object(App, "chat", return_value="Mocked chat response")
result = person_app.chat(input_query, config)
assert result == "Mocked chat response"
def test_opensource_person_app_query(mocker, opensource_person_app):
input_query = "Hello, how are you?"
config = BaseLlmConfig()
mocker.patch.object(App, "query", return_value="Mocked response")
result = opensource_person_app.query(input_query, config)
assert result == "Mocked response"
def test_opensource_person_app_chat(mocker, opensource_person_app):
input_query = "Hello, how are you?"
config = BaseLlmConfig()
mocker.patch.object(App, "chat", return_value="Mocked chat response")
result = opensource_person_app.chat(input_query, config)
assert result == "Mocked chat response"

View File

@@ -63,7 +63,6 @@ class TestJsonSerializable(unittest.TestCase):
config = AppConfig(id=random_id, collect_metrics=False) config = AppConfig(id=random_id, collect_metrics=False)
# config class is set under app.config. # config class is set under app.config.
app = App(config=config) app = App(config=config)
# w/o recursion it would just be <embedchain.config.apps.OpenSourceAppConfig.OpenSourceAppConfig object at x>
s = app.serialize() s = app.serialize()
new_app: App = App.deserialize(s) new_app: App = App.deserialize(s)
# The id of the new app is the same as the first one. # The id of the new app is the same as the first one.