docs: update docstrings (#565)
This commit is contained in:
@@ -42,5 +42,13 @@ class AddConfig(BaseConfig):
|
||||
chunker: Optional[ChunkerConfig] = None,
|
||||
loader: Optional[LoaderConfig] = None,
|
||||
):
|
||||
"""
|
||||
Initializes a configuration class instance for the `add` method.
|
||||
|
||||
:param chunker: Chunker config, defaults to None
|
||||
:type chunker: Optional[ChunkerConfig], optional
|
||||
:param loader: Loader config, defaults to None
|
||||
:type loader: Optional[LoaderConfig], optional
|
||||
"""
|
||||
self.loader = loader
|
||||
self.chunker = chunker
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from typing import Any, Dict
|
||||
|
||||
from embedchain.helper_classes.json_serializable import JSONSerializable
|
||||
|
||||
|
||||
@@ -7,7 +9,13 @@ class BaseConfig(JSONSerializable):
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes a configuration class for a class."""
|
||||
pass
|
||||
|
||||
def as_dict(self):
|
||||
def as_dict(self) -> Dict[str, Any]:
|
||||
"""Return config object as a dict
|
||||
|
||||
:return: config object as dict
|
||||
:rtype: Dict[str, Any]
|
||||
"""
|
||||
return vars(self)
|
||||
|
||||
@@ -13,15 +13,23 @@ class AppConfig(BaseAppConfig):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level=None,
|
||||
id=None,
|
||||
log_level: str = "WARNING",
|
||||
id: Optional[str] = None,
|
||||
collect_metrics: Optional[bool] = None,
|
||||
collection_name: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
:param log_level: Optional. (String) Debug level
|
||||
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
|
||||
:param id: Optional. ID of the app. Document metadata will have this id.
|
||||
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
|
||||
Initializes a configuration class instance for an App. This is the simplest form of an embedchain app.
|
||||
Most of the configuration is done in the `App` class itself.
|
||||
|
||||
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
|
||||
:type log_level: str, optional
|
||||
:param id: ID of the app. Document metadata will have this id., defaults to None
|
||||
:type id: Optional[str], optional
|
||||
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
|
||||
:type collect_metrics: Optional[bool], optional
|
||||
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
|
||||
defaults to None
|
||||
:type collection_name: Optional[str], optional
|
||||
"""
|
||||
super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, collection_name=collection_name)
|
||||
|
||||
@@ -13,23 +13,28 @@ class BaseAppConfig(BaseConfig, JSONSerializable):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level=None,
|
||||
log_level: str = "WARNING",
|
||||
db: Optional[BaseVectorDB] = None,
|
||||
id=None,
|
||||
id: Optional[str] = None,
|
||||
collect_metrics: bool = True,
|
||||
collection_name: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
:param log_level: Optional. (String) Debug level
|
||||
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
|
||||
:param db: Optional. (Vector) database instance to use for embeddings. Deprecated in favor of app(..., db).
|
||||
:param id: Optional. ID of the app. Document metadata will have this id.
|
||||
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
|
||||
:param db_type: Optional. Initializes a default vector database of the given type.
|
||||
Using the `db` argument is preferred.
|
||||
:param es_config: Optional. elasticsearch database config to be used for connection
|
||||
:param collection_name: Optional. Default collection name.
|
||||
It's recommended to use app.set_collection_name() instead.
|
||||
Initializes a configuration class instance for an App.
|
||||
Most of the configuration is done in the `App` class itself.
|
||||
|
||||
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
|
||||
:type log_level: str, optional
|
||||
:param db: A database class. It is recommended to set this directly in the `App` class, not this config,
|
||||
defaults to None
|
||||
:type db: Optional[BaseVectorDB], optional
|
||||
:param id: ID of the app. Document metadata will have this id., defaults to None
|
||||
:type id: Optional[str], optional
|
||||
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
|
||||
:type collect_metrics: Optional[bool], optional
|
||||
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
|
||||
defaults to None
|
||||
:type collection_name: Optional[str], optional
|
||||
"""
|
||||
self._setup_logging(log_level)
|
||||
self.id = id
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import Optional
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.vectordb.base_vector_db import BaseVectorDB
|
||||
|
||||
from .BaseAppConfig import BaseAppConfig
|
||||
|
||||
@@ -17,24 +18,29 @@ class CustomAppConfig(BaseAppConfig):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level=None,
|
||||
db=None,
|
||||
id=None,
|
||||
log_level: str = "WARNING",
|
||||
db: Optional[BaseVectorDB] = None,
|
||||
id: Optional[str] = None,
|
||||
collect_metrics: Optional[bool] = None,
|
||||
collection_name: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
:param log_level: Optional. (String) Debug level
|
||||
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
|
||||
:param db: Optional. (Vector) database to use for embeddings.
|
||||
:param id: Optional. ID of the app. Document metadata will have this id.
|
||||
:param provider: Optional. (Providers): LLM Provider to use.
|
||||
:param open_source_app_config: Optional. Config instance needed for open source apps.
|
||||
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
|
||||
:param collection_name: Optional. Default collection name.
|
||||
It's recommended to use app.set_collection_name() instead.
|
||||
"""
|
||||
Initializes a configuration class instance for an Custom App.
|
||||
Most of the configuration is done in the `CustomApp` class itself.
|
||||
|
||||
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
|
||||
:type log_level: str, optional
|
||||
:param db: A database class. It is recommended to set this directly in the `CustomApp` class, not this config,
|
||||
defaults to None
|
||||
:type db: Optional[BaseVectorDB], optional
|
||||
:param id: ID of the app. Document metadata will have this id., defaults to None
|
||||
:type id: Optional[str], optional
|
||||
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
|
||||
:type collect_metrics: Optional[bool], optional
|
||||
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
|
||||
defaults to None
|
||||
:type collection_name: Optional[str], optional
|
||||
"""
|
||||
super().__init__(
|
||||
log_level=log_level, db=db, id=id, collect_metrics=collect_metrics, collection_name=collection_name
|
||||
)
|
||||
|
||||
@@ -13,21 +13,27 @@ class OpenSourceAppConfig(BaseAppConfig):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level=None,
|
||||
id=None,
|
||||
log_level: str = "WARNING",
|
||||
id: Optional[str] = None,
|
||||
collect_metrics: Optional[bool] = None,
|
||||
model=None,
|
||||
model: str = "orca-mini-3b.ggmlv3.q4_0.bin",
|
||||
collection_name: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
:param log_level: Optional. (String) Debug level
|
||||
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
|
||||
:param id: Optional. ID of the app. Document metadata will have this id.
|
||||
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
|
||||
:param model: Optional. GPT4ALL uses the model to instantiate the class.
|
||||
So unlike `App`, it has to be provided before querying.
|
||||
:param collection_name: Optional. Default collection name.
|
||||
It's recommended to use app.db.set_collection_name() instead.
|
||||
Initializes a configuration class instance for an Open Source App.
|
||||
|
||||
:param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
|
||||
:type log_level: str, optional
|
||||
:param id: ID of the app. Document metadata will have this id., defaults to None
|
||||
:type id: Optional[str], optional
|
||||
:param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
|
||||
:type collect_metrics: Optional[bool], optional
|
||||
:param model: GPT4ALL uses the model to instantiate the class.
|
||||
Unlike `App`, it has to be provided before querying, defaults to "orca-mini-3b.ggmlv3.q4_0.bin"
|
||||
:type model: str, optional
|
||||
:param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
|
||||
defaults to None
|
||||
:type collection_name: Optional[str], optional
|
||||
"""
|
||||
self.model = model or "orca-mini-3b.ggmlv3.q4_0.bin"
|
||||
|
||||
|
||||
@@ -6,5 +6,13 @@ from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
@register_deserializable
|
||||
class BaseEmbedderConfig:
|
||||
def __init__(self, model: Optional[str] = None, deployment_name: Optional[str] = None):
|
||||
"""
|
||||
Initialize a new instance of an embedder config class.
|
||||
|
||||
:param model: model name of the llm embedding model (not applicable to all providers), defaults to None
|
||||
:type model: Optional[str], optional
|
||||
:param deployment_name: deployment name for llm embedding model, defaults to None
|
||||
:type deployment_name: Optional[str], optional
|
||||
"""
|
||||
self.model = model
|
||||
self.deployment_name = deployment_name
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import re
|
||||
from string import Template
|
||||
from typing import Optional
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from embedchain.config.BaseConfig import BaseConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
@@ -57,51 +57,59 @@ class BaseLlmConfig(BaseConfig):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
number_documents=None,
|
||||
template: Template = None,
|
||||
model=None,
|
||||
temperature=None,
|
||||
max_tokens=None,
|
||||
top_p=None,
|
||||
number_documents: int = 1,
|
||||
template: Optional[Template] = None,
|
||||
model: Optional[str] = None,
|
||||
temperature: float = 0,
|
||||
max_tokens: int = 1000,
|
||||
top_p: float = 1,
|
||||
stream: bool = False,
|
||||
deployment_name=None,
|
||||
deployment_name: Optional[str] = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
where=None,
|
||||
where: Dict[str, Any] = None,
|
||||
):
|
||||
"""
|
||||
Initializes the QueryConfig instance.
|
||||
Initializes a configuration class instance for the LLM.
|
||||
|
||||
:param number_documents: Number of documents to pull from the database as
|
||||
context.
|
||||
:param template: Optional. The `Template` instance to use as a template for
|
||||
prompt.
|
||||
:param model: Optional. Controls the OpenAI model used.
|
||||
:param temperature: Optional. Controls the randomness of the model's output.
|
||||
Higher values (closer to 1) make output more random, lower values make it more
|
||||
deterministic.
|
||||
:param max_tokens: Optional. Controls how many tokens are generated.
|
||||
:param top_p: Optional. Controls the diversity of words. Higher values
|
||||
(closer to 1) make word selection more diverse, lower values make words less
|
||||
diverse.
|
||||
:param stream: Optional. Control if response is streamed back to user
|
||||
:param deployment_name: t.b.a.
|
||||
:param system_prompt: Optional. System prompt string.
|
||||
:param where: Optional. A dictionary of key-value pairs to filter the database results.
|
||||
Takes the place of the former `QueryConfig` or `ChatConfig`.
|
||||
Use `LlmConfig` as an alias to `BaseLlmConfig`.
|
||||
|
||||
:param number_documents: Number of documents to pull from the database as
|
||||
context, defaults to 1
|
||||
:type number_documents: int, optional
|
||||
:param template: The `Template` instance to use as a template for
|
||||
prompt, defaults to None
|
||||
:type template: Optional[Template], optional
|
||||
:param model: Controls the OpenAI model used, defaults to None
|
||||
:type model: Optional[str], optional
|
||||
:param temperature: Controls the randomness of the model's output.
|
||||
Higher values (closer to 1) make output more random, lower values make it more deterministic, defaults to 0
|
||||
:type temperature: float, optional
|
||||
:param max_tokens: Controls how many tokens are generated, defaults to 1000
|
||||
:type max_tokens: int, optional
|
||||
:param top_p: Controls the diversity of words. Higher values (closer to 1) make word selection more diverse,
|
||||
defaults to 1
|
||||
:type top_p: float, optional
|
||||
:param stream: Control if response is streamed back to user, defaults to False
|
||||
:type stream: bool, optional
|
||||
:param deployment_name: t.b.a., defaults to None
|
||||
:type deployment_name: Optional[str], optional
|
||||
:param system_prompt: System prompt string, defaults to None
|
||||
:type system_prompt: Optional[str], optional
|
||||
:param where: A dictionary of key-value pairs to filter the database results., defaults to None
|
||||
:type where: Dict[str, Any], optional
|
||||
:raises ValueError: If the template is not valid as template should
|
||||
contain $context and $query (and optionally $history).
|
||||
contain $context and $query (and optionally $history)
|
||||
:raises ValueError: Stream is not boolean
|
||||
"""
|
||||
if number_documents is None:
|
||||
self.number_documents = 1
|
||||
else:
|
||||
self.number_documents = number_documents
|
||||
|
||||
if template is None:
|
||||
template = DEFAULT_PROMPT_TEMPLATE
|
||||
|
||||
self.temperature = temperature if temperature else 0
|
||||
self.max_tokens = max_tokens if max_tokens else 1000
|
||||
self.number_documents = number_documents
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.model = model
|
||||
self.top_p = top_p if top_p else 1
|
||||
self.top_p = top_p
|
||||
self.deployment_name = deployment_name
|
||||
self.system_prompt = system_prompt
|
||||
|
||||
@@ -115,20 +123,24 @@ class BaseLlmConfig(BaseConfig):
|
||||
self.stream = stream
|
||||
self.where = where
|
||||
|
||||
def validate_template(self, template: Template):
|
||||
def validate_template(self, template: Template) -> bool:
|
||||
"""
|
||||
validate the template
|
||||
|
||||
:param template: the template to validate
|
||||
:return: Boolean, valid (true) or invalid (false)
|
||||
:type template: Template
|
||||
:return: valid (true) or invalid (false)
|
||||
:rtype: bool
|
||||
"""
|
||||
return re.search(query_re, template.template) and re.search(context_re, template.template)
|
||||
|
||||
def _validate_template_history(self, template: Template):
|
||||
def _validate_template_history(self, template: Template) -> bool:
|
||||
"""
|
||||
validate the history template for history
|
||||
validate the template with history
|
||||
|
||||
:param template: the template to validate
|
||||
:return: Boolean, valid (true) or invalid (false)
|
||||
:type template: Template
|
||||
:return: valid (true) or invalid (false)
|
||||
:rtype: bool
|
||||
"""
|
||||
return re.search(history_re, template.template)
|
||||
|
||||
@@ -7,11 +7,23 @@ class BaseVectorDbConfig(BaseConfig):
|
||||
def __init__(
|
||||
self,
|
||||
collection_name: Optional[str] = None,
|
||||
dir: Optional[str] = None,
|
||||
dir: str = "db",
|
||||
host: Optional[str] = None,
|
||||
port: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Initializes a configuration class instance for the vector database.
|
||||
|
||||
:param collection_name: Default name for the collection, defaults to None
|
||||
:type collection_name: Optional[str], optional
|
||||
:param dir: Path to the database directory, where the database is stored, defaults to "db"
|
||||
:type dir: str, optional
|
||||
:param host: Database connection remote host. Use this if you run Embedchain as a client, defaults to None
|
||||
:type host: Optional[str], optional
|
||||
:param host: Database connection remote port. Use this if you run Embedchain as a client, defaults to None
|
||||
:type port: Optional[str], optional
|
||||
"""
|
||||
self.collection_name = collection_name or "embedchain_store"
|
||||
self.dir = dir or "db"
|
||||
self.dir = dir
|
||||
self.host = host
|
||||
self.port = port
|
||||
|
||||
@@ -14,6 +14,20 @@ class ChromaDbConfig(BaseVectorDbConfig):
|
||||
port: Optional[str] = None,
|
||||
chroma_settings: Optional[dict] = None,
|
||||
):
|
||||
"""
|
||||
Initializes a configuration class instance for ChromaDB.
|
||||
|
||||
:param collection_name: Default name for the collection, defaults to None
|
||||
:type collection_name: Optional[str], optional
|
||||
:param dir: Path to the database directory, where the database is stored, defaults to None
|
||||
:type dir: Optional[str], optional
|
||||
:param host: Database connection remote host. Use this if you run Embedchain as a client, defaults to None
|
||||
:type host: Optional[str], optional
|
||||
:param port: Database connection remote port. Use this if you run Embedchain as a client, defaults to None
|
||||
:type port: Optional[str], optional
|
||||
:param chroma_settings: Chroma settings dict, defaults to None
|
||||
:type chroma_settings: Optional[dict], optional
|
||||
"""
|
||||
"""
|
||||
:param chroma_settings: Optional. Chroma settings for connection.
|
||||
"""
|
||||
|
||||
@@ -14,9 +14,16 @@ class ElasticsearchDBConfig(BaseVectorDbConfig):
|
||||
**ES_EXTRA_PARAMS: Dict[str, any],
|
||||
):
|
||||
"""
|
||||
Config to initialize an elasticsearch client.
|
||||
:param es_url. elasticsearch url or list of nodes url to be used for connection
|
||||
Initializes a configuration class instance for an Elasticsearch client.
|
||||
|
||||
:param collection_name: Default name for the collection, defaults to None
|
||||
:type collection_name: Optional[str], optional
|
||||
:param dir: Path to the database directory, where the database is stored, defaults to None
|
||||
:type dir: Optional[str], optional
|
||||
:param es_url: elasticsearch url or list of nodes url to be used for connection, defaults to None
|
||||
:type es_url: Union[str, List[str]], optional
|
||||
:param ES_EXTRA_PARAMS: extra params dict that can be passed to elasticsearch.
|
||||
:type ES_EXTRA_PARAMS: Dict[str, Any], optional
|
||||
"""
|
||||
# self, es_url: Union[str, List[str]] = None, **ES_EXTRA_PARAMS: Dict[str, any]):
|
||||
self.ES_URL = es_url
|
||||
|
||||
Reference in New Issue
Block a user