[docs]: Revamp embedchain docs (#799)

This commit is contained in:
Deshraj Yadav
2023-10-13 15:38:15 -07:00
committed by GitHub
parent a86d7f52e9
commit 4a8c50f886
68 changed files with 1175 additions and 673 deletions

View File

@@ -46,7 +46,7 @@ class App(EmbedChain):
:param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to OpenAiLlm
:type llm: BaseLlm, optional
:param llm_config: Allows you to configure the LLM, e.g. how many documents to return,
example: `from embedchain.config import LlmConfig`, defaults to None
example: `from embedchain.config import BaseLlmConfig`, defaults to None
:type llm_config: Optional[BaseLlmConfig], optional
:param db: The database to use for storing and retrieving embeddings,
example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to ChromaDb

View File

@@ -46,7 +46,7 @@ class OpenSourceApp(App):
that does not fall into the LLM, database or embedder category, defaults to None
:type config: OpenSourceAppConfig, optional
:param llm_config: Allows you to configure the LLM, e.g. how many documents to return.
example: `from embedchain.config import LlmConfig`, defaults to None
example: `from embedchain.config import BaseLlmConfig`, defaults to None
:type llm_config: BaseLlmConfig, optional
:param chromadb_config: Allows you to configure the open source database,
example: `from embedchain.config import ChromaDbConfig`, defaults to None
@@ -54,7 +54,7 @@ class OpenSourceApp(App):
:param system_prompt: System prompt that will be provided to the LLM as such.
Please don't use for the time being, as it's not supported., defaults to None
:type system_prompt: Optional[str], optional
:raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid.
:raises TypeError: `OpenSourceAppConfig` or `BaseLlmConfig` invalid.
"""
logging.warning(
"DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`."

View File

@@ -4,8 +4,8 @@ from embedchain.apps.app import App
from embedchain.apps.open_source_app import OpenSourceApp
from embedchain.config import BaseLlmConfig
from embedchain.config.apps.base_app_config import BaseAppConfig
from embedchain.config.llm.base_llm_config import (DEFAULT_PROMPT,
DEFAULT_PROMPT_WITH_HISTORY)
from embedchain.config.llm.base import (DEFAULT_PROMPT,
DEFAULT_PROMPT_WITH_HISTORY)
from embedchain.helper.json_serializable import register_deserializable

View File

@@ -1,7 +1,7 @@
from typing import Any
from embedchain import App
from embedchain.config import AddConfig, AppConfig, LlmConfig
from embedchain.config import AddConfig, AppConfig, BaseLlmConfig
from embedchain.embedder.openai import OpenAIEmbedder
from embedchain.helper.json_serializable import (JSONSerializable,
register_deserializable)
@@ -27,14 +27,14 @@ class BaseBot(JSONSerializable):
config = config if config else AddConfig()
self.app.add(data, config=config)
def query(self, query: str, config: LlmConfig = None) -> str:
def query(self, query: str, config: BaseLlmConfig = None) -> str:
"""
Query the bot
:param query: the user query
:type query: str
:param config: configuration class instance, defaults to None
:type config: LlmConfig, optional
:type config: BaseLlmConfig, optional
:return: Answer
:rtype: str
"""

View File

@@ -7,8 +7,7 @@ from .apps.open_source_app_config import OpenSourceAppConfig
from .base_config import BaseConfig
from .embedder.base import BaseEmbedderConfig
from .embedder.base import BaseEmbedderConfig as EmbedderConfig
from .llm.base_llm_config import BaseLlmConfig
from .llm.base_llm_config import BaseLlmConfig as LlmConfig
from .llm.base import BaseLlmConfig
from .vectordb.chroma import ChromaDbConfig
from .vectordb.elasticsearch import ElasticsearchDBConfig
from .vectordb.opensearch import OpenSearchDBConfig

View File

@@ -73,7 +73,6 @@ class BaseLlmConfig(BaseConfig):
Initializes a configuration class instance for the LLM.
Takes the place of the former `QueryConfig` or `ChatConfig`.
Use `LlmConfig` as an alias to `BaseLlmConfig`.
:param number_documents: Number of documents to pull from the database as
context, defaults to 1
@@ -115,6 +114,9 @@ class BaseLlmConfig(BaseConfig):
self.system_prompt = system_prompt
self.query_type = query_type
if type(template) is str:
template = Template(template)
if self.validate_template(template):
self.template = template
else:

View File

@@ -470,7 +470,7 @@ class EmbedChain(JSONSerializable):
:param input_query: The query to use.
:type input_query: str
:param config: The `LlmConfig` instance to use as configuration options. This is used for one method call.
:param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
To persistently use a config, declare it during app init., defaults to None
:type config: Optional[BaseLlmConfig], optional
:param dry_run: A dry run does everything except send the resulting prompt to
@@ -506,7 +506,7 @@ class EmbedChain(JSONSerializable):
:param input_query: The query to use.
:type input_query: str
:param config: The `LlmConfig` instance to use as configuration options. This is used for one method call.
:param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
To persistently use a config, declare it during app init., defaults to None
:type config: Optional[BaseLlmConfig], optional
:param dry_run: A dry run does everything except send the resulting prompt to

View File

@@ -7,7 +7,7 @@ from embedchain.embedder.base import BaseEmbedder
from embedchain.models import VectorDimensions
class VertexAiEmbedder(BaseEmbedder):
class VertexAIEmbedder(BaseEmbedder):
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
super().__init__(config=config)

View File

@@ -13,16 +13,16 @@ class LlmFactory:
"azure_openai": "embedchain.llm.azure_openai.AzureOpenAILlm",
"cohere": "embedchain.llm.cohere.CohereLlm",
"gpt4all": "embedchain.llm.gpt4all.GPT4ALLLlm",
"hugging_face_llm": "embedchain.llm.hugging_face_llm.HuggingFaceLlm",
"huggingface": "embedchain.llm.huggingface.HuggingFaceLlm",
"jina": "embedchain.llm.jina.JinaLlm",
"llama2": "embedchain.llm.llama2.Llama2Llm",
"openai": "embedchain.llm.openai.OpenAILlm",
"vertexai": "embedchain.llm.vertex_ai.VertexAILlm",
}
provider_to_config_class = {
"embedchain": "embedchain.config.llm.base_llm_config.BaseLlmConfig",
"openai": "embedchain.config.llm.base_llm_config.BaseLlmConfig",
"anthropic": "embedchain.config.llm.base_llm_config.BaseLlmConfig",
"embedchain": "embedchain.config.llm.base.BaseLlmConfig",
"openai": "embedchain.config.llm.base.BaseLlmConfig",
"anthropic": "embedchain.config.llm.base.BaseLlmConfig",
}
@classmethod
@@ -43,7 +43,7 @@ class EmbedderFactory:
provider_to_class = {
"gpt4all": "embedchain.embedder.gpt4all.GPT4AllEmbedder",
"huggingface": "embedchain.embedder.huggingface.HuggingFaceEmbedder",
"vertexai": "embedchain.embedder.vertexai.VertexAiEmbedder",
"vertexai": "embedchain.embedder.vertexai.VertexAIEmbedder",
"openai": "embedchain.embedder.openai.OpenAIEmbedder",
}
provider_to_config_class = {

View File

@@ -5,9 +5,9 @@ from langchain.memory import ConversationBufferMemory
from langchain.schema import BaseMessage
from embedchain.config import BaseLlmConfig
from embedchain.config.llm.base_llm_config import (
DEFAULT_PROMPT, DEFAULT_PROMPT_WITH_HISTORY_TEMPLATE,
DOCS_SITE_PROMPT_TEMPLATE)
from embedchain.config.llm.base import (DEFAULT_PROMPT,
DEFAULT_PROMPT_WITH_HISTORY_TEMPLATE,
DOCS_SITE_PROMPT_TEMPLATE)
from embedchain.helper.json_serializable import JSONSerializable
@@ -174,7 +174,7 @@ class BaseLlm(JSONSerializable):
:type input_query: str
:param contexts: Embeddings retrieved from the database to be used as context.
:type contexts: List[str]
:param config: The `LlmConfig` instance to use as configuration options. This is used for one method call.
:param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
To persistently use a config, declare it during app init., defaults to None
:type config: Optional[BaseLlmConfig], optional
:param dry_run: A dry run does everything except send the resulting prompt to
@@ -230,7 +230,7 @@ class BaseLlm(JSONSerializable):
:type input_query: str
:param contexts: Embeddings retrieved from the database to be used as context.
:type contexts: List[str]
:param config: The `LlmConfig` instance to use as configuration options. This is used for one method call.
:param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
To persistently use a config, declare it during app init., defaults to None
:type config: Optional[BaseLlmConfig], optional
:param dry_run: A dry run does everything except send the resulting prompt to

View File

@@ -30,11 +30,11 @@ class GPT4ALLLlm(BaseLlm):
def _get_answer(self, prompt: str, config: BaseLlmConfig) -> Union[str, Iterable]:
if config.model and config.model != self.config.model:
raise RuntimeError(
"OpenSourceApp does not support switching models at runtime. Please create a new app instance."
"GPT4ALLLlm does not support switching models at runtime. Please create a new app instance."
)
if config.system_prompt:
raise ValueError("OpenSourceApp does not support `system_prompt`")
raise ValueError("GPT4ALLLlm does not support `system_prompt`")
response = self.instance.generate(
prompt=prompt,

View File

@@ -10,10 +10,10 @@ from embedchain.llm.base import BaseLlm
@register_deserializable
class HuggingFaceHubLlm(BaseLlm):
class HuggingFaceLlm(BaseLlm):
def __init__(self, config: Optional[BaseLlmConfig] = None):
if "HUGGINGFACEHUB_ACCESS_TOKEN" not in os.environ:
raise ValueError("Please set the HUGGINGFACEHUB_ACCESS_TOKEN environment variable.")
if "HUGGINGFACE_ACCESS_TOKEN" not in os.environ:
raise ValueError("Please set the HUGGINGFACE_ACCESS_TOKEN environment variable.")
try:
importlib.import_module("huggingface_hub")
@@ -27,8 +27,8 @@ class HuggingFaceHubLlm(BaseLlm):
def get_llm_model_answer(self, prompt):
if self.config.system_prompt:
raise ValueError("HuggingFaceHubLlm does not support `system_prompt`")
return HuggingFaceHubLlm._get_answer(prompt=prompt, config=self.config)
raise ValueError("HuggingFaceLlm does not support `system_prompt`")
return HuggingFaceLlm._get_answer(prompt=prompt, config=self.config)
@staticmethod
def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
@@ -43,7 +43,7 @@ class HuggingFaceHubLlm(BaseLlm):
raise ValueError("`top_p` must be > 0.0 and < 1.0")
llm = HuggingFaceHub(
huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_ACCESS_TOKEN"],
huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
repo_id=config.model or "google/flan-t5-xxl",
model_kwargs=model_kwargs,
)

View File

@@ -7,12 +7,12 @@ from embedchain.llm.base import BaseLlm
@register_deserializable
class VertexAiLlm(BaseLlm):
class VertexAILlm(BaseLlm):
def __init__(self, config: Optional[BaseLlmConfig] = None):
super().__init__(config=config)
def get_llm_model_answer(self, prompt):
return VertexAiLlm._get_answer(prompt=prompt, config=self.config)
return VertexAILlm._get_answer(prompt=prompt, config=self.config)
@staticmethod
def _get_answer(prompt: str, config: BaseLlmConfig) -> str:

View File

@@ -1,26 +0,0 @@
app:
config:
id: 'my-app'
collection_name: 'my-app'
llm:
provider: openai
model: 'gpt-3.5-turbo'
config:
temperature: 0.5
max_tokens: 1000
top_p: 1
stream: false
vectordb:
provider: chroma
config:
collection_name: 'my-app'
dir: db
allow_reset: true
embedder:
provider: openai
config:
model: 'text-embedding-ada-002'
deployment_name: null

View File

@@ -1,33 +0,0 @@
app:
config:
id: 'my-app'
log_level: 'WARN'
collect_metrics: true
collection_name: 'my-app'
llm:
provider: openai
model: 'gpt-3.5-turbo'
config:
temperature: 0.5
max_tokens: 1000
top_p: 1
stream: false
vectordb:
provider: opensearch
config:
opensearch_url: 'https://localhost:9200'
http_auth:
- admin
- admin
vector_dimension: 1536
collection_name: 'my-app'
use_ssl: false
verify_certs: false
embedder:
provider: openai
config:
model: 'text-embedding-ada-002'
deployment_name: null

View File

@@ -1,27 +0,0 @@
app:
config:
id: 'open-source-app'
collection_name: 'open-source-app'
collect_metrics: false
llm:
provider: gpt4all
model: 'orca-mini-3b.ggmlv3.q4_0.bin'
config:
temperature: 0.5
max_tokens: 1000
top_p: 1
stream: false
vectordb:
provider: chroma
config:
collection_name: 'open-source-app'
dir: db
allow_reset: true
embedder:
provider: gpt4all
config:
model: 'all-MiniLM-L6-v2'
deployment_name: null