[docs]: Revamp embedchain docs (#799)

2023-10-13 15:38:15 -07:00
parent a86d7f52e9
commit 4a8c50f886
68 changed files with 1175 additions and 673 deletions
--- a/embedchain/apps/app.py
+++ b/embedchain/apps/app.py
@@ -46,7 +46,7 @@ class App(EmbedChain):
        :param llm:  LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to OpenAiLlm
        :type llm: BaseLlm, optional
        :param llm_config: Allows you to configure the LLM, e.g. how many documents to return,
-        example: `from embedchain.config import LlmConfig`, defaults to None
+        example: `from embedchain.config import BaseLlmConfig`, defaults to None
        :type llm_config: Optional[BaseLlmConfig], optional
        :param db: The database to use for storing and retrieving embeddings,
        example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to ChromaDb
--- a/embedchain/apps/open_source_app.py
+++ b/embedchain/apps/open_source_app.py
@@ -46,7 +46,7 @@ class OpenSourceApp(App):
        that does not fall into the LLM, database or embedder category, defaults to None
        :type config: OpenSourceAppConfig, optional
        :param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
-        example: `from embedchain.config import LlmConfig`, defaults to None
+        example: `from embedchain.config import BaseLlmConfig`, defaults to None
        :type llm_config: BaseLlmConfig, optional
        :param chromadb_config: Allows you to configure the open source database,
        example: `from embedchain.config import ChromaDbConfig`, defaults to None
@@ -54,7 +54,7 @@ class OpenSourceApp(App):
        :param system_prompt: System prompt that will be provided to the LLM as such.
        Please don't use for the time being, as it's not supported., defaults to None
        :type system_prompt: Optional[str], optional
-        :raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid.
+        :raises TypeError: `OpenSourceAppConfig` or `BaseLlmConfig` invalid.
        """
        logging.warning(
            "DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`."
--- a/embedchain/apps/person_app.py
+++ b/embedchain/apps/person_app.py
@@ -4,8 +4,8 @@ from embedchain.apps.app import App
 from embedchain.apps.open_source_app import OpenSourceApp
 from embedchain.config import BaseLlmConfig
 from embedchain.config.apps.base_app_config import BaseAppConfig
-from embedchain.config.llm.base_llm_config import (DEFAULT_PROMPT,
-                                                   DEFAULT_PROMPT_WITH_HISTORY)
+from embedchain.config.llm.base import (DEFAULT_PROMPT,
+                                        DEFAULT_PROMPT_WITH_HISTORY)
 from embedchain.helper.json_serializable import register_deserializable


--- a/embedchain/bots/base.py
+++ b/embedchain/bots/base.py
@@ -1,7 +1,7 @@
 from typing import Any

 from embedchain import App
-from embedchain.config import AddConfig, AppConfig, LlmConfig
+from embedchain.config import AddConfig, AppConfig, BaseLlmConfig
 from embedchain.embedder.openai import OpenAIEmbedder
 from embedchain.helper.json_serializable import (JSONSerializable,
                                                 register_deserializable)
@@ -27,14 +27,14 @@ class BaseBot(JSONSerializable):
        config = config if config else AddConfig()
        self.app.add(data, config=config)

-    def query(self, query: str, config: LlmConfig = None) -> str:
+    def query(self, query: str, config: BaseLlmConfig = None) -> str:
        """
        Query the bot

        :param query: the user query
        :type query: str
        :param config: configuration class instance, defaults to None
-        :type config: LlmConfig, optional
+        :type config: BaseLlmConfig, optional
        :return: Answer
        :rtype: str
        """
--- a/embedchain/config/init.py
+++ b/embedchain/config/init.py
@@ -7,8 +7,7 @@ from .apps.open_source_app_config import OpenSourceAppConfig
 from .base_config import BaseConfig
 from .embedder.base import BaseEmbedderConfig
 from .embedder.base import BaseEmbedderConfig as EmbedderConfig
-from .llm.base_llm_config import BaseLlmConfig
-from .llm.base_llm_config import BaseLlmConfig as LlmConfig
+from .llm.base import BaseLlmConfig
 from .vectordb.chroma import ChromaDbConfig
 from .vectordb.elasticsearch import ElasticsearchDBConfig
 from .vectordb.opensearch import OpenSearchDBConfig
--- a/embedchain/config/llm/base_llm_config.py
+++ b/embedchain/config/llm/base_llm_config.py
@@ -73,7 +73,6 @@ class BaseLlmConfig(BaseConfig):
        Initializes a configuration class instance for the LLM.

        Takes the place of the former `QueryConfig` or `ChatConfig`.
-        Use `LlmConfig` as an alias to `BaseLlmConfig`.

        :param number_documents:  Number of documents to pull from the database as
        context, defaults to 1
@@ -115,6 +114,9 @@ class BaseLlmConfig(BaseConfig):
        self.system_prompt = system_prompt
        self.query_type = query_type

+        if type(template) is str:
+            template = Template(template)
+
        if self.validate_template(template):
            self.template = template
        else:
--- a/embedchain/embedchain.py
+++ b/embedchain/embedchain.py
@@ -470,7 +470,7 @@ class EmbedChain(JSONSerializable):

        :param input_query: The query to use.
        :type input_query: str
-        :param config: The `LlmConfig` instance to use as configuration options. This is used for one method call.
+        :param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
        To persistently use a config, declare it during app init., defaults to None
        :type config: Optional[BaseLlmConfig], optional
        :param dry_run: A dry run does everything except send the resulting prompt to
@@ -506,7 +506,7 @@ class EmbedChain(JSONSerializable):

        :param input_query: The query to use.
        :type input_query: str
-        :param config: The `LlmConfig` instance to use as configuration options. This is used for one method call.
+        :param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
        To persistently use a config, declare it during app init., defaults to None
        :type config: Optional[BaseLlmConfig], optional
        :param dry_run: A dry run does everything except send the resulting prompt to
--- a/embedchain/embedder/vertexai.py
+++ b/embedchain/embedder/vertexai.py
@@ -7,7 +7,7 @@ from embedchain.embedder.base import BaseEmbedder
 from embedchain.models import VectorDimensions


-class VertexAiEmbedder(BaseEmbedder):
+class VertexAIEmbedder(BaseEmbedder):
    def __init__(self, config: Optional[BaseEmbedderConfig] = None):
        super().__init__(config=config)

--- a/embedchain/factory.py
+++ b/embedchain/factory.py
@@ -13,16 +13,16 @@ class LlmFactory:
        "azure_openai": "embedchain.llm.azure_openai.AzureOpenAILlm",
        "cohere": "embedchain.llm.cohere.CohereLlm",
        "gpt4all": "embedchain.llm.gpt4all.GPT4ALLLlm",
-        "hugging_face_llm": "embedchain.llm.hugging_face_llm.HuggingFaceLlm",
+        "huggingface": "embedchain.llm.huggingface.HuggingFaceLlm",
        "jina": "embedchain.llm.jina.JinaLlm",
        "llama2": "embedchain.llm.llama2.Llama2Llm",
        "openai": "embedchain.llm.openai.OpenAILlm",
        "vertexai": "embedchain.llm.vertex_ai.VertexAILlm",
    }
    provider_to_config_class = {
-        "embedchain": "embedchain.config.llm.base_llm_config.BaseLlmConfig",
-        "openai": "embedchain.config.llm.base_llm_config.BaseLlmConfig",
-        "anthropic": "embedchain.config.llm.base_llm_config.BaseLlmConfig",
+        "embedchain": "embedchain.config.llm.base.BaseLlmConfig",
+        "openai": "embedchain.config.llm.base.BaseLlmConfig",
+        "anthropic": "embedchain.config.llm.base.BaseLlmConfig",
    }

    @classmethod
@@ -43,7 +43,7 @@ class EmbedderFactory:
    provider_to_class = {
        "gpt4all": "embedchain.embedder.gpt4all.GPT4AllEmbedder",
        "huggingface": "embedchain.embedder.huggingface.HuggingFaceEmbedder",
-        "vertexai": "embedchain.embedder.vertexai.VertexAiEmbedder",
+        "vertexai": "embedchain.embedder.vertexai.VertexAIEmbedder",
        "openai": "embedchain.embedder.openai.OpenAIEmbedder",
    }
    provider_to_config_class = {
--- a/embedchain/llm/base.py
+++ b/embedchain/llm/base.py
@@ -5,9 +5,9 @@ from langchain.memory import ConversationBufferMemory
 from langchain.schema import BaseMessage

 from embedchain.config import BaseLlmConfig
-from embedchain.config.llm.base_llm_config import (
-    DEFAULT_PROMPT, DEFAULT_PROMPT_WITH_HISTORY_TEMPLATE,
-    DOCS_SITE_PROMPT_TEMPLATE)
+from embedchain.config.llm.base import (DEFAULT_PROMPT,
+                                        DEFAULT_PROMPT_WITH_HISTORY_TEMPLATE,
+                                        DOCS_SITE_PROMPT_TEMPLATE)
 from embedchain.helper.json_serializable import JSONSerializable


@@ -174,7 +174,7 @@ class BaseLlm(JSONSerializable):
        :type input_query: str
        :param contexts: Embeddings retrieved from the database to be used as context.
        :type contexts: List[str]
-        :param config: The `LlmConfig` instance to use as configuration options. This is used for one method call.
+        :param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
        To persistently use a config, declare it during app init., defaults to None
        :type config: Optional[BaseLlmConfig], optional
        :param dry_run: A dry run does everything except send the resulting prompt to
@@ -230,7 +230,7 @@ class BaseLlm(JSONSerializable):
        :type input_query: str
        :param contexts: Embeddings retrieved from the database to be used as context.
        :type contexts: List[str]
-        :param config: The `LlmConfig` instance to use as configuration options. This is used for one method call.
+        :param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
        To persistently use a config, declare it during app init., defaults to None
        :type config: Optional[BaseLlmConfig], optional
        :param dry_run: A dry run does everything except send the resulting prompt to
--- a/embedchain/llm/gpt4all.py
+++ b/embedchain/llm/gpt4all.py
@@ -30,11 +30,11 @@ class GPT4ALLLlm(BaseLlm):
    def _get_answer(self, prompt: str, config: BaseLlmConfig) -> Union[str, Iterable]:
        if config.model and config.model != self.config.model:
            raise RuntimeError(
-                "OpenSourceApp does not support switching models at runtime. Please create a new app instance."
+                "GPT4ALLLlm does not support switching models at runtime. Please create a new app instance."
            )

        if config.system_prompt:
-            raise ValueError("OpenSourceApp does not support `system_prompt`")
+            raise ValueError("GPT4ALLLlm does not support `system_prompt`")

        response = self.instance.generate(
            prompt=prompt,
--- a/embedchain/llm/hugging_face_hub.py
+++ b/embedchain/llm/hugging_face_hub.py
@@ -10,10 +10,10 @@ from embedchain.llm.base import BaseLlm


@register_deserializable
-class HuggingFaceHubLlm(BaseLlm):
+class HuggingFaceLlm(BaseLlm):
    def __init__(self, config: Optional[BaseLlmConfig] = None):
-        if "HUGGINGFACEHUB_ACCESS_TOKEN" not in os.environ:
-            raise ValueError("Please set the HUGGINGFACEHUB_ACCESS_TOKEN environment variable.")
+        if "HUGGINGFACE_ACCESS_TOKEN" not in os.environ:
+            raise ValueError("Please set the HUGGINGFACE_ACCESS_TOKEN environment variable.")

        try:
            importlib.import_module("huggingface_hub")
@@ -27,8 +27,8 @@ class HuggingFaceHubLlm(BaseLlm):

    def get_llm_model_answer(self, prompt):
        if self.config.system_prompt:
-            raise ValueError("HuggingFaceHubLlm does not support `system_prompt`")
-        return HuggingFaceHubLlm._get_answer(prompt=prompt, config=self.config)
+            raise ValueError("HuggingFaceLlm does not support `system_prompt`")
+        return HuggingFaceLlm._get_answer(prompt=prompt, config=self.config)

    @staticmethod
    def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
@@ -43,7 +43,7 @@ class HuggingFaceHubLlm(BaseLlm):
            raise ValueError("`top_p` must be > 0.0 and < 1.0")

        llm = HuggingFaceHub(
-            huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_ACCESS_TOKEN"],
+            huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
            repo_id=config.model or "google/flan-t5-xxl",
            model_kwargs=model_kwargs,
        )
--- a/embedchain/llm/vertex_ai.py
+++ b/embedchain/llm/vertex_ai.py
@@ -7,12 +7,12 @@ from embedchain.llm.base import BaseLlm


@register_deserializable
-class VertexAiLlm(BaseLlm):
+class VertexAILlm(BaseLlm):
    def __init__(self, config: Optional[BaseLlmConfig] = None):
        super().__init__(config=config)

    def get_llm_model_answer(self, prompt):
-        return VertexAiLlm._get_answer(prompt=prompt, config=self.config)
+        return VertexAILlm._get_answer(prompt=prompt, config=self.config)

    @staticmethod
    def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
--- a/embedchain/yaml/chroma.yaml
+++ b/embedchain/yaml/chroma.yaml
@@ -1,26 +0,0 @@
-app:
-  config:
-    id: 'my-app'
-    collection_name: 'my-app'
-
-llm:
-  provider: openai
-  model: 'gpt-3.5-turbo'
-  config:
-    temperature: 0.5
-    max_tokens: 1000
-    top_p: 1
-    stream: false
-
-vectordb:
-  provider: chroma
-  config:
-    collection_name: 'my-app'
-    dir: db
-    allow_reset: true
-
-embedder:
-  provider: openai
-  config:
-    model: 'text-embedding-ada-002'
-    deployment_name: null
--- a/embedchain/yaml/opensearch.yaml
+++ b/embedchain/yaml/opensearch.yaml
@@ -1,33 +0,0 @@
-app:
-  config:
-    id: 'my-app'
-    log_level: 'WARN'
-    collect_metrics: true
-    collection_name: 'my-app'
-
-llm:
-  provider: openai
-  model: 'gpt-3.5-turbo'
-  config:
-    temperature: 0.5
-    max_tokens: 1000
-    top_p: 1
-    stream: false
-
-vectordb:
-  provider: opensearch
-  config:
-    opensearch_url: 'https://localhost:9200'
-    http_auth:
-      - admin
-      - admin
-    vector_dimension: 1536
-    collection_name: 'my-app'
-    use_ssl: false
-    verify_certs: false
-
-embedder:
-  provider: openai
-  config:
-    model: 'text-embedding-ada-002'
-    deployment_name: null
--- a/embedchain/yaml/opensource.yaml
+++ b/embedchain/yaml/opensource.yaml
@@ -1,27 +0,0 @@
-app:
-  config:
-    id: 'open-source-app'
-    collection_name: 'open-source-app'
-    collect_metrics: false
-
-llm:
-  provider: gpt4all
-  model: 'orca-mini-3b.ggmlv3.q4_0.bin'
-  config:
-    temperature: 0.5
-    max_tokens: 1000
-    top_p: 1
-    stream: false
-
-vectordb:
-  provider: chroma
-  config:
-    collection_name: 'open-source-app'
-    dir: db
-    allow_reset: true
-
-embedder:
-  provider: gpt4all
-  config:
-    model: 'all-MiniLM-L6-v2'
-    deployment_name: null