refactor: classes and configs (#528)

This commit is contained in:
cachho
2023-09-05 10:12:58 +02:00
committed by GitHub
parent 387b042a49
commit 344e7470f6
50 changed files with 1221 additions and 997 deletions

View File

@@ -1,9 +1,9 @@
import logging
from typing import Optional
from embedchain.config.BaseConfig import BaseConfig
from embedchain.config.vectordbs import ElasticsearchDBConfig
from embedchain.helper_classes.json_serializable import JSONSerializable
from embedchain.models import VectorDatabases, VectorDimensions
from embedchain.vectordb.base_vector_db import BaseVectorDB
class BaseAppConfig(BaseConfig, JSONSerializable):
@@ -14,81 +14,38 @@ class BaseAppConfig(BaseConfig, JSONSerializable):
def __init__(
self,
log_level=None,
embedding_fn=None,
db=None,
host=None,
port=None,
db: Optional[BaseVectorDB] = None,
id=None,
collection_name=None,
collect_metrics: bool = True,
db_type: VectorDatabases = None,
vector_dim: VectorDimensions = None,
es_config: ElasticsearchDBConfig = None,
chroma_settings: dict = {},
collection_name: Optional[str] = None,
):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
:param embedding_fn: Embedding function to use.
:param db: Optional. (Vector) database instance to use for embeddings.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
:param db: Optional. (Vector) database instance to use for embeddings. Deprecated in favor of app(..., db).
:param id: Optional. ID of the app. Document metadata will have this id.
:param collection_name: Optional. Collection name for the database.
:param collect_metrics: Defaults to True. Send anonymous telemetry to improve embedchain.
:param db_type: Optional. type of Vector database to use
:param vector_dim: Vector dimension generated by embedding fn
:param db_type: Optional. Initializes a default vector database of the given type.
Using the `db` argument is preferred.
:param es_config: Optional. elasticsearch database config to be used for connection
:param chroma_settings: Optional. Chroma settings for connection.
:param collection_name: Optional. Default collection name.
It's recommended to use app.set_collection_name() instead.
"""
self._setup_logging(log_level)
self.collection_name = collection_name if collection_name else "embedchain_store"
self.db = BaseAppConfig.get_db(
db=db,
embedding_fn=embedding_fn,
host=host,
port=port,
db_type=db_type,
vector_dim=vector_dim,
collection_name=self.collection_name,
es_config=es_config,
chroma_settings=chroma_settings,
)
self.id = id
self.collect_metrics = True if (collect_metrics is True or collect_metrics is None) else False
return
self.collection_name = collection_name
@staticmethod
def get_db(db, embedding_fn, host, port, db_type, vector_dim, collection_name, es_config, chroma_settings):
"""
Get db based on db_type, db with default database (`ChromaDb`)
:param Optional. (Vector) database to use for embeddings.
:param embedding_fn: Embedding function to use in database.
:param host: Optional. Hostname for the database server.
:param port: Optional. Port for the database server.
:param db_type: Optional. db type to use. Supported values (`es`, `chroma`)
:param vector_dim: Vector dimension generated by embedding fn
:param collection_name: Optional. Collection name for the database.
:param es_config: Optional. elasticsearch database config to be used for connection
:raises ValueError: BaseAppConfig knows no default embedding function.
:returns: database instance
"""
if db:
return db
if embedding_fn is None:
raise ValueError("ChromaDb cannot be instantiated without an embedding function")
if db_type == VectorDatabases.ELASTICSEARCH:
from embedchain.vectordb.elasticsearch_db import ElasticsearchDB
return ElasticsearchDB(
embedding_fn=embedding_fn, vector_dim=vector_dim, collection_name=collection_name, es_config=es_config
self._db = db
logging.warning(
"DEPRECATION WARNING: Please supply the database as the second parameter during app init. "
"Such as `app(config=config, db=db)`."
)
from embedchain.vectordb.chroma_db import ChromaDB
return ChromaDB(embedding_fn=embedding_fn, host=host, port=port, chroma_settings=chroma_settings)
if collection_name:
logging.warning("DEPRECATION WARNING: Please supply the collection name to the database config.")
return
def _setup_logging(self, debug_level):
level = logging.WARNING # Default level