t6_mem0/embedchain/apps/OpenSourceApp.py

import logging
from typing import Optional

from embedchain.config import (BaseEmbedderConfig, BaseLlmConfig,
                               ChromaDbConfig, OpenSourceAppConfig)
from embedchain.embedchain import EmbedChain
from embedchain.embedder.gpt4all import GPT4AllEmbedder
from embedchain.helper.json_serializable import register_deserializable
from embedchain.llm.gpt4all import GPT4ALLLlm
from embedchain.vectordb.chroma import ChromaDB

gpt4all_model = None


@register_deserializable
class OpenSourceApp(EmbedChain):
    """
    The embedchain Open Source App.
    Comes preconfigured with the best open source LLM, embedding model, database.

    Methods:
    add(source, data_type): adds the data from the given URL to the vector db.
    query(query): finds answer to the given query using vector database and LLM.
    chat(query): finds answer to the given query using vector database and LLM, with conversation history.
    """

    def __init__(
        self,
        config: OpenSourceAppConfig = None,
        llm_config: BaseLlmConfig = None,
        chromadb_config: Optional[ChromaDbConfig] = None,
        system_prompt: Optional[str] = None,
    ):
        """
        Initialize a new `CustomApp` instance.
        Since it's opinionated you don't have to choose a LLM, database and embedder.
        However, you can configure those.

        :param config: Config for the app instance. This is the most basic configuration,
        that does not fall into the LLM, database or embedder category, defaults to None
        :type config: OpenSourceAppConfig, optional
        :param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
        example: `from embedchain.config import LlmConfig`, defaults to None
        :type llm_config: BaseLlmConfig, optional
        :param chromadb_config: Allows you to configure the open source database,
        example: `from embedchain.config import ChromaDbConfig`, defaults to None
        :type chromadb_config: Optional[ChromaDbConfig], optional
        :param system_prompt: System prompt that will be provided to the LLM as such.
        Please don't use for the time being, as it's not supported., defaults to None
        :type system_prompt: Optional[str], optional
        :raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid.
        """
        logging.info("Loading open source embedding model. This may take some time...")  # noqa:E501
        if not config:
            config = OpenSourceAppConfig()

        if not isinstance(config, OpenSourceAppConfig):
            raise TypeError(
                "OpenSourceApp needs a OpenSourceAppConfig passed to it. "
                "You can import it with `from embedchain.config import OpenSourceAppConfig`"
            )

        if not llm_config:
            llm_config = BaseLlmConfig(model="orca-mini-3b.ggmlv3.q4_0.bin")
        elif not isinstance(llm_config, BaseLlmConfig):
            raise TypeError(
                "The LlmConfig passed to OpenSourceApp is invalid. "
                "You can import it with `from embedchain.config import LlmConfig`"
            )
        elif not llm_config.model:
            llm_config.model = "orca-mini-3b.ggmlv3.q4_0.bin"

        llm = GPT4ALLLlm(config=llm_config)
        embedder = GPT4AllEmbedder(config=BaseEmbedderConfig(model="all-MiniLM-L6-v2"))
        logging.error("Successfully loaded open source embedding model.")
        database = ChromaDB(config=chromadb_config)

        super().__init__(config, llm=llm, db=database, embedder=embedder, system_prompt=system_prompt)