[Feature/Improvements] Delete data sources from metadata db when using app.delete() (#1286)
This commit is contained in:
@@ -16,6 +16,7 @@ class PineconeDBConfig(BaseVectorDbConfig):
|
|||||||
pod_config: Optional[dict[str, any]] = None,
|
pod_config: Optional[dict[str, any]] = None,
|
||||||
serverless_config: Optional[dict[str, any]] = None,
|
serverless_config: Optional[dict[str, any]] = None,
|
||||||
hybrid_search: bool = False,
|
hybrid_search: bool = False,
|
||||||
|
bm25_encoder: any = None,
|
||||||
**extra_params: dict[str, any],
|
**extra_params: dict[str, any],
|
||||||
):
|
):
|
||||||
self.metric = metric
|
self.metric = metric
|
||||||
@@ -24,6 +25,7 @@ class PineconeDBConfig(BaseVectorDbConfig):
|
|||||||
self.vector_dimension = vector_dimension
|
self.vector_dimension = vector_dimension
|
||||||
self.extra_params = extra_params
|
self.extra_params = extra_params
|
||||||
self.hybrid_search = hybrid_search
|
self.hybrid_search = hybrid_search
|
||||||
|
self.bm25_encoder = bm25_encoder
|
||||||
if pod_config is None and serverless_config is None:
|
if pod_config is None and serverless_config is None:
|
||||||
# If no config is provided, use the default pod spec config
|
# If no config is provided, use the default pod spec config
|
||||||
pod_environment = os.environ.get("PINECONE_ENV", "gcp-starter")
|
pod_environment = os.environ.get("PINECONE_ENV", "gcp-starter")
|
||||||
|
|||||||
@@ -6,17 +6,20 @@ from typing import Any, Optional, Union
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
|
|
||||||
from embedchain.cache import adapt, get_gptcache_session, gptcache_data_convert, gptcache_update_cache_callback
|
from embedchain.cache import (adapt, get_gptcache_session,
|
||||||
|
gptcache_data_convert,
|
||||||
|
gptcache_update_cache_callback)
|
||||||
from embedchain.chunkers.base_chunker import BaseChunker
|
from embedchain.chunkers.base_chunker import BaseChunker
|
||||||
from embedchain.config import AddConfig, BaseLlmConfig, ChunkerConfig
|
from embedchain.config import AddConfig, BaseLlmConfig, ChunkerConfig
|
||||||
from embedchain.config.base_app_config import BaseAppConfig
|
from embedchain.config.base_app_config import BaseAppConfig
|
||||||
from embedchain.core.db.models import DataSource
|
from embedchain.core.db.models import ChatHistory, DataSource
|
||||||
from embedchain.data_formatter import DataFormatter
|
from embedchain.data_formatter import DataFormatter
|
||||||
from embedchain.embedder.base import BaseEmbedder
|
from embedchain.embedder.base import BaseEmbedder
|
||||||
from embedchain.helpers.json_serializable import JSONSerializable
|
from embedchain.helpers.json_serializable import JSONSerializable
|
||||||
from embedchain.llm.base import BaseLlm
|
from embedchain.llm.base import BaseLlm
|
||||||
from embedchain.loaders.base_loader import BaseLoader
|
from embedchain.loaders.base_loader import BaseLoader
|
||||||
from embedchain.models.data_type import DataType, DirectDataType, IndirectDataType, SpecialDataType
|
from embedchain.models.data_type import (DataType, DirectDataType,
|
||||||
|
IndirectDataType, SpecialDataType)
|
||||||
from embedchain.utils.misc import detect_datatype, is_valid_json_string
|
from embedchain.utils.misc import detect_datatype, is_valid_json_string
|
||||||
from embedchain.vectordb.base import BaseVectorDB
|
from embedchain.vectordb.base import BaseVectorDB
|
||||||
|
|
||||||
@@ -642,9 +645,10 @@ class EmbedChain(JSONSerializable):
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self.db_session.query(DataSource).filter_by(app_id=self.config.id).delete()
|
self.db_session.query(DataSource).filter_by(app_id=self.config.id).delete()
|
||||||
|
self.db_session.query(ChatHistory).filter_by(app_id=self.config.id).delete()
|
||||||
self.db_session.commit()
|
self.db_session.commit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error deleting chat history: {e}")
|
logging.error(f"Error deleting data sources: {e}")
|
||||||
self.db_session.rollback()
|
self.db_session.rollback()
|
||||||
return None
|
return None
|
||||||
self.db.reset()
|
self.db.reset()
|
||||||
@@ -682,6 +686,13 @@ class EmbedChain(JSONSerializable):
|
|||||||
:param source_hash: The hash of the source.
|
:param source_hash: The hash of the source.
|
||||||
:type source_hash: str
|
:type source_hash: str
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
|
self.db_session.query(DataSource).filter_by(hash=source_id, app_id=self.config.id).delete()
|
||||||
|
self.db_session.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error deleting data sources: {e}")
|
||||||
|
self.db_session.rollback()
|
||||||
|
return None
|
||||||
self.db.delete(where={"hash": source_id})
|
self.db.delete(where={"hash": source_id})
|
||||||
logging.info(f"Successfully deleted {source_id}")
|
logging.info(f"Successfully deleted {source_id}")
|
||||||
# Send anonymous telemetry
|
# Send anonymous telemetry
|
||||||
|
|||||||
@@ -49,9 +49,8 @@ class PineconeDB(BaseVectorDB):
|
|||||||
# Setup BM25Encoder if sparse vectors are to be used
|
# Setup BM25Encoder if sparse vectors are to be used
|
||||||
self.bm25_encoder = None
|
self.bm25_encoder = None
|
||||||
if self.config.hybrid_search:
|
if self.config.hybrid_search:
|
||||||
# TODO: Add support for fitting BM25Encoder on any corpus
|
|
||||||
logging.info("Initializing BM25Encoder for sparse vectors..")
|
logging.info("Initializing BM25Encoder for sparse vectors..")
|
||||||
self.bm25_encoder = BM25Encoder.default()
|
self.bm25_encoder = self.config.bm25_encoder if self.config.bm25_encoder else BM25Encoder.default()
|
||||||
|
|
||||||
# Call parent init here because embedder is needed
|
# Call parent init here because embedder is needed
|
||||||
super().__init__(config=self.config)
|
super().__init__(config=self.config)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "embedchain"
|
name = "embedchain"
|
||||||
version = "0.1.86"
|
version = "0.1.87"
|
||||||
description = "Simplest open source retrieval(RAG) framework"
|
description = "Simplest open source retrieval(RAG) framework"
|
||||||
authors = [
|
authors = [
|
||||||
"Taranjeet Singh <taranjeet@embedchain.ai>",
|
"Taranjeet Singh <taranjeet@embedchain.ai>",
|
||||||
|
|||||||
Reference in New Issue
Block a user