79 lines
3.5 KiB
Python
79 lines
3.5 KiB
Python
import logging
|
|
from typing import Optional
|
|
|
|
from embedchain.config import (BaseEmbedderConfig, BaseLlmConfig,
|
|
ChromaDbConfig, OpenSourceAppConfig)
|
|
from embedchain.embedchain import EmbedChain
|
|
from embedchain.embedder.gpt4all import GPT4AllEmbedder
|
|
from embedchain.helper.json_serializable import register_deserializable
|
|
from embedchain.llm.gpt4all import GPT4ALLLlm
|
|
from embedchain.vectordb.chroma import ChromaDB
|
|
|
|
gpt4all_model = None
|
|
|
|
|
|
@register_deserializable
|
|
class OpenSourceApp(EmbedChain):
|
|
"""
|
|
The embedchain Open Source App.
|
|
Comes preconfigured with the best open source LLM, embedding model, database.
|
|
|
|
Methods:
|
|
add(source, data_type): adds the data from the given URL to the vector db.
|
|
query(query): finds answer to the given query using vector database and LLM.
|
|
chat(query): finds answer to the given query using vector database and LLM, with conversation history.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
config: OpenSourceAppConfig = None,
|
|
llm_config: BaseLlmConfig = None,
|
|
chromadb_config: Optional[ChromaDbConfig] = None,
|
|
system_prompt: Optional[str] = None,
|
|
):
|
|
"""
|
|
Initialize a new `CustomApp` instance.
|
|
Since it's opinionated you don't have to choose a LLM, database and embedder.
|
|
However, you can configure those.
|
|
|
|
:param config: Config for the app instance. This is the most basic configuration,
|
|
that does not fall into the LLM, database or embedder category, defaults to None
|
|
:type config: OpenSourceAppConfig, optional
|
|
:param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
|
|
example: `from embedchain.config import LlmConfig`, defaults to None
|
|
:type llm_config: BaseLlmConfig, optional
|
|
:param chromadb_config: Allows you to configure the open source database,
|
|
example: `from embedchain.config import ChromaDbConfig`, defaults to None
|
|
:type chromadb_config: Optional[ChromaDbConfig], optional
|
|
:param system_prompt: System prompt that will be provided to the LLM as such.
|
|
Please don't use for the time being, as it's not supported., defaults to None
|
|
:type system_prompt: Optional[str], optional
|
|
:raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid.
|
|
"""
|
|
logging.info("Loading open source embedding model. This may take some time...") # noqa:E501
|
|
if not config:
|
|
config = OpenSourceAppConfig()
|
|
|
|
if not isinstance(config, OpenSourceAppConfig):
|
|
raise TypeError(
|
|
"OpenSourceApp needs a OpenSourceAppConfig passed to it. "
|
|
"You can import it with `from embedchain.config import OpenSourceAppConfig`"
|
|
)
|
|
|
|
if not llm_config:
|
|
llm_config = BaseLlmConfig(model="orca-mini-3b.ggmlv3.q4_0.bin")
|
|
elif not isinstance(llm_config, BaseLlmConfig):
|
|
raise TypeError(
|
|
"The LlmConfig passed to OpenSourceApp is invalid. "
|
|
"You can import it with `from embedchain.config import LlmConfig`"
|
|
)
|
|
elif not llm_config.model:
|
|
llm_config.model = "orca-mini-3b.ggmlv3.q4_0.bin"
|
|
|
|
llm = GPT4ALLLlm(config=llm_config)
|
|
embedder = GPT4AllEmbedder(config=BaseEmbedderConfig(model="all-MiniLM-L6-v2"))
|
|
logging.error("Successfully loaded open source embedding model.")
|
|
database = ChromaDB(config=chromadb_config)
|
|
|
|
super().__init__(config, llm=llm, db=database, embedder=embedder, system_prompt=system_prompt)
|