docs: update docstrings (#565)

This commit is contained in:
cachho
2023-09-07 02:04:44 +02:00
committed by GitHub
parent 4754372fcd
commit 1ac8aef4de
25 changed files with 736 additions and 298 deletions

View File

@@ -42,5 +42,13 @@ class AddConfig(BaseConfig):
chunker: Optional[ChunkerConfig] = None,
loader: Optional[LoaderConfig] = None,
):
"""
Initializes a configuration class instance for the `add` method.
:param chunker: Chunker config, defaults to None
:type chunker: Optional[ChunkerConfig], optional
:param loader: Loader config, defaults to None
:type loader: Optional[LoaderConfig], optional
"""
self.loader = loader
self.chunker = chunker

View File

@@ -1,3 +1,5 @@
from typing import Any, Dict
from embedchain.helper_classes.json_serializable import JSONSerializable
@@ -7,7 +9,13 @@ class BaseConfig(JSONSerializable):
"""
def __init__(self):
"""Initializes a configuration class for a class."""
pass
def as_dict(self):
def as_dict(self) -> Dict[str, Any]:
"""Return config object as a dict
:return: config object as dict
:rtype: Dict[str, Any]
"""
return vars(self)

View File

@@ -13,15 +13,23 @@ class AppConfig(BaseAppConfig):
def __init__(
self,
log_level=None,
id=None,
log_level: str = "WARNING",
id: Optional[str] = None,
collect_metrics: Optional[bool] = None,
collection_name: Optional[str] = None,
):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param id: Optional. ID of the app. Document metadata will have this id.
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
Initializes a configuration class instance for an App. This is the simplest form of an embedchain app.
Most of the configuration is done in the `App` class itself.
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
:type log_level: str, optional
:param id: ID of the app. Document metadata will have this id., defaults to None
:type id: Optional[str], optional
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
:type collect_metrics: Optional[bool], optional
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
defaults to None
:type collection_name: Optional[str], optional
"""
super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, collection_name=collection_name)

View File

@@ -13,23 +13,28 @@ class BaseAppConfig(BaseConfig, JSONSerializable):
def __init__(
self,
log_level=None,
log_level: str = "WARNING",
db: Optional[BaseVectorDB] = None,
id=None,
id: Optional[str] = None,
collect_metrics: bool = True,
collection_name: Optional[str] = None,
):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param db: Optional. (Vector) database instance to use for embeddings. Deprecated in favor of app(..., db).
:param id: Optional. ID of the app. Document metadata will have this id.
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
:param db_type: Optional. Initializes a default vector database of the given type.
Using the `db` argument is preferred.
:param es_config: Optional. elasticsearch database config to be used for connection
:param collection_name: Optional. Default collection name.
It's recommended to use app.set_collection_name() instead.
Initializes a configuration class instance for an App.
Most of the configuration is done in the `App` class itself.
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
:type log_level: str, optional
:param db: A database class. It is recommended to set this directly in the `App` class, not this config,
defaults to None
:type db: Optional[BaseVectorDB], optional
:param id: ID of the app. Document metadata will have this id., defaults to None
:type id: Optional[str], optional
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
:type collect_metrics: Optional[bool], optional
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
defaults to None
:type collection_name: Optional[str], optional
"""
self._setup_logging(log_level)
self.id = id

View File

@@ -3,6 +3,7 @@ from typing import Optional
from dotenv import load_dotenv
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.vectordb.base_vector_db import BaseVectorDB
from .BaseAppConfig import BaseAppConfig
@@ -17,24 +18,29 @@ class CustomAppConfig(BaseAppConfig):
def __init__(
self,
log_level=None,
db=None,
id=None,
log_level: str = "WARNING",
db: Optional[BaseVectorDB] = None,
id: Optional[str] = None,
collect_metrics: Optional[bool] = None,
collection_name: Optional[str] = None,
):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param db: Optional. (Vector) database to use for embeddings.
:param id: Optional. ID of the app. Document metadata will have this id.
:param provider: Optional. (Providers): LLM Provider to use.
:param open_source_app_config: Optional. Config instance needed for open source apps.
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
:param collection_name: Optional. Default collection name.
It's recommended to use app.set_collection_name() instead.
"""
Initializes a configuration class instance for an Custom App.
Most of the configuration is done in the `CustomApp` class itself.
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
:type log_level: str, optional
:param db: A database class. It is recommended to set this directly in the `CustomApp` class, not this config,
defaults to None
:type db: Optional[BaseVectorDB], optional
:param id: ID of the app. Document metadata will have this id., defaults to None
:type id: Optional[str], optional
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
:type collect_metrics: Optional[bool], optional
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
defaults to None
:type collection_name: Optional[str], optional
"""
super().__init__(
log_level=log_level, db=db, id=id, collect_metrics=collect_metrics, collection_name=collection_name
)

View File

@@ -13,21 +13,27 @@ class OpenSourceAppConfig(BaseAppConfig):
def __init__(
self,
log_level=None,
id=None,
log_level: str = "WARNING",
id: Optional[str] = None,
collect_metrics: Optional[bool] = None,
model=None,
model: str = "orca-mini-3b.ggmlv3.q4_0.bin",
collection_name: Optional[str] = None,
):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param id: Optional. ID of the app. Document metadata will have this id.
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
:param model: Optional. GPT4ALL uses the model to instantiate the class.
So unlike `App`, it has to be provided before querying.
:param collection_name: Optional. Default collection name.
It's recommended to use app.db.set_collection_name() instead.
Initializes a configuration class instance for an Open Source App.
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
:type log_level: str, optional
:param id: ID of the app. Document metadata will have this id., defaults to None
:type id: Optional[str], optional
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
:type collect_metrics: Optional[bool], optional
:param model: GPT4ALL uses the model to instantiate the class.
Unlike `App`, it has to be provided before querying, defaults to "orca-mini-3b.ggmlv3.q4_0.bin"
:type model: str, optional
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
defaults to None
:type collection_name: Optional[str], optional
"""
self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin"

View File

@@ -6,5 +6,13 @@ from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class BaseEmbedderConfig:
def __init__(self, model: Optional[str] = None, deployment_name: Optional[str] = None):
"""
Initialize a new instance of an embedder config class.
:param model: model name of the llm embedding model (not applicable to all providers), defaults to None
:type model: Optional[str], optional
:param deployment_name: deployment name for llm embedding model, defaults to None
:type deployment_name: Optional[str], optional
"""
self.model = model
self.deployment_name = deployment_name

View File

@@ -1,6 +1,6 @@
import re
from string import Template
from typing import Optional
from typing import Any, Dict, Optional
from embedchain.config.BaseConfig import BaseConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@@ -57,51 +57,59 @@ class BaseLlmConfig(BaseConfig):
def __init__(
self,
number_documents=None,
template: Template = None,
model=None,
temperature=None,
max_tokens=None,
top_p=None,
number_documents: int = 1,
template: Optional[Template] = None,
model: Optional[str] = None,
temperature: float = 0,
max_tokens: int = 1000,
top_p: float = 1,
stream: bool = False,
deployment_name=None,
deployment_name: Optional[str] = None,
system_prompt: Optional[str] = None,
where=None,
where: Dict[str, Any] = None,
):
"""
Initializes the QueryConfig instance.
Initializes a configuration class instance for the LLM.
:param number_documents: Number of documents to pull from the database as
context.
:param template: Optional. The `Template` instance to use as a template for
prompt.
:param model: Optional. Controls the OpenAI model used.
:param temperature: Optional. Controls the randomness of the model's output.
Higher values (closer to 1) make output more random, lower values make it more
deterministic.
:param max_tokens: Optional. Controls how many tokens are generated.
:param top_p: Optional. Controls the diversity of words. Higher values
(closer to 1) make word selection more diverse, lower values make words less
diverse.
:param stream: Optional. Control if response is streamed back to user
:param deployment_name: t.b.a.
:param system_prompt: Optional. System prompt string.
:param where: Optional. A dictionary of key-value pairs to filter the database results.
Takes the place of the former `QueryConfig` or `ChatConfig`.
Use `LlmConfig` as an alias to `BaseLlmConfig`.
:param number_documents: Number of documents to pull from the database as
context, defaults to 1
:type number_documents: int, optional
:param template: The `Template` instance to use as a template for
prompt, defaults to None
:type template: Optional[Template], optional
:param model: Controls the OpenAI model used, defaults to None
:type model: Optional[str], optional
:param temperature: Controls the randomness of the model's output.
Higher values (closer to 1) make output more random, lower values make it more deterministic, defaults to 0
:type temperature: float, optional
:param max_tokens: Controls how many tokens are generated, defaults to 1000
:type max_tokens: int, optional
:param top_p: Controls the diversity of words. Higher values (closer to 1) make word selection more diverse,
defaults to 1
:type top_p: float, optional
:param stream: Control if response is streamed back to user, defaults to False
:type stream: bool, optional
:param deployment_name: t.b.a., defaults to None
:type deployment_name: Optional[str], optional
:param system_prompt: System prompt string, defaults to None
:type system_prompt: Optional[str], optional
:param where: A dictionary of key-value pairs to filter the database results., defaults to None
:type where: Dict[str, Any], optional
:raises ValueError: If the template is not valid as template should
contain $context and $query (and optionally $history).
contain $context and $query (and optionally $history)
:raises ValueError: Stream is not boolean
"""
if number_documents is None:
self.number_documents = 1
else:
self.number_documents = number_documents
if template is None:
template = DEFAULT_PROMPT_TEMPLATE
self.temperature = temperature if temperature else 0
self.max_tokens = max_tokens if max_tokens else 1000
self.number_documents = number_documents
self.temperature = temperature
self.max_tokens = max_tokens
self.model = model
self.top_p = top_p if top_p else 1
self.top_p = top_p
self.deployment_name = deployment_name
self.system_prompt = system_prompt
@@ -115,20 +123,24 @@ class BaseLlmConfig(BaseConfig):
self.stream = stream
self.where = where
def validate_template(self, template: Template):
def validate_template(self, template: Template) -> bool:
"""
validate the template
:param template: the template to validate
:return: Boolean, valid (true) or invalid (false)
:type template: Template
:return: valid (true) or invalid (false)
:rtype: bool
"""
return re.search(query_re, template.template) and re.search(context_re, template.template)
def _validate_template_history(self, template: Template):
def _validate_template_history(self, template: Template) -> bool:
"""
validate the history template for history
validate the template with history
:param template: the template to validate
:return: Boolean, valid (true) or invalid (false)
:type template: Template
:return: valid (true) or invalid (false)
:rtype: bool
"""
return re.search(history_re, template.template)

View File

@@ -7,11 +7,23 @@ class BaseVectorDbConfig(BaseConfig):
def __init__(
self,
collection_name: Optional[str] = None,
dir: Optional[str] = None,
dir: str = "db",
host: Optional[str] = None,
port: Optional[str] = None,
):
"""
Initializes a configuration class instance for the vector database.
:param collection_name: Default name for the collection, defaults to None
:type collection_name: Optional[str], optional
:param dir: Path to the database directory, where the database is stored, defaults to "db"
:type dir: str, optional
:param host: Database connection remote host. Use this if you run Embedchain as a client, defaults to None
:type host: Optional[str], optional
:param host: Database connection remote port. Use this if you run Embedchain as a client, defaults to None
:type port: Optional[str], optional
"""
self.collection_name = collection_name or "embedchain_store"
self.dir = dir or "db"
self.dir = dir
self.host = host
self.port = port

View File

@@ -14,6 +14,20 @@ class ChromaDbConfig(BaseVectorDbConfig):
port: Optional[str] = None,
chroma_settings: Optional[dict] = None,
):
"""
Initializes a configuration class instance for ChromaDB.
:param collection_name: Default name for the collection, defaults to None
:type collection_name: Optional[str], optional
:param dir: Path to the database directory, where the database is stored, defaults to None
:type dir: Optional[str], optional
:param host: Database connection remote host. Use this if you run Embedchain as a client, defaults to None
:type host: Optional[str], optional
:param port: Database connection remote port. Use this if you run Embedchain as a client, defaults to None
:type port: Optional[str], optional
:param chroma_settings: Chroma settings dict, defaults to None
:type chroma_settings: Optional[dict], optional
"""
"""
:param chroma_settings: Optional. Chroma settings for connection.
"""

View File

@@ -14,9 +14,16 @@ class ElasticsearchDBConfig(BaseVectorDbConfig):
**ES_EXTRA_PARAMS: Dict[str, any],
):
"""
Config to initialize an elasticsearch client.
:param es_url. elasticsearch url or list of nodes url to be used for connection
Initializes a configuration class instance for an Elasticsearch client.
:param collection_name: Default name for the collection, defaults to None
:type collection_name: Optional[str], optional
:param dir: Path to the database directory, where the database is stored, defaults to None
:type dir: Optional[str], optional
:param es_url: elasticsearch url or list of nodes url to be used for connection, defaults to None
:type es_url: Union[str, List[str]], optional
:param ES_EXTRA_PARAMS: extra params dict that can be passed to elasticsearch.
:type ES_EXTRA_PARAMS: Dict[str, Any], optional
"""
# self, es_url: Union[str, List[str]] = None, **ES_EXTRA_PARAMS: Dict[str, any]):
self.ES_URL = es_url