[Feature] Add support to use any sql database as the metadata storage for embedchain apps (#1273)
This commit is contained in:
@@ -7,4 +7,4 @@ from embedchain.client import Client # noqa: F401
|
||||
from embedchain.pipeline import Pipeline # noqa: F401
|
||||
|
||||
# Setup the user directory if doesn't exist already
|
||||
Client.setup_dir()
|
||||
Client.setup()
|
||||
|
||||
116
embedchain/alembic.ini
Normal file
116
embedchain/alembic.ini
Normal file
@@ -0,0 +1,116 @@
|
||||
# A generic, single database configuration.
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts
|
||||
script_location = embedchain/migrations
|
||||
|
||||
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
||||
# Uncomment the line below if you want the files to be prepended with date and time
|
||||
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
||||
# for all available tokens
|
||||
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
# defaults to the current working directory.
|
||||
prepend_sys_path = .
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
# If specified, requires the python>=3.9 or backports.zoneinfo library.
|
||||
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
||||
# string value is passed to ZoneInfo()
|
||||
# leave blank for localtime
|
||||
# timezone =
|
||||
|
||||
# max length of characters to apply to the
|
||||
# "slug" field
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during
|
||||
# the 'revision' command, regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without
|
||||
# a source .py file to be detected as revisions in the
|
||||
# versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification; This defaults
|
||||
# to alembic/versions. When using multiple version
|
||||
# directories, initial revisions must be specified with --version-path.
|
||||
# The path separator used here should be the separator specified by "version_path_separator" below.
|
||||
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
|
||||
|
||||
# version path separator; As mentioned above, this is the character used to split
|
||||
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
||||
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
||||
# Valid values for version_path_separator are:
|
||||
#
|
||||
# version_path_separator = :
|
||||
# version_path_separator = ;
|
||||
# version_path_separator = space
|
||||
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
# new in Alembic version 1.10
|
||||
# recursive_version_locations = false
|
||||
|
||||
# the output encoding used when revision files
|
||||
# are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
sqlalchemy.url = driver://user:pass@localhost/dbname
|
||||
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts. See the documentation for further
|
||||
# detail and examples
|
||||
|
||||
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
||||
# hooks = black
|
||||
# black.type = console_scripts
|
||||
# black.entrypoint = black
|
||||
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
||||
|
||||
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
||||
# hooks = ruff
|
||||
# ruff.type = exec
|
||||
# ruff.executable = %(here)s/.venv/bin/ruff
|
||||
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
@@ -3,7 +3,6 @@ import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import uuid
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
@@ -16,7 +15,8 @@ from embedchain.cache import (Config, ExactMatchEvaluation,
|
||||
gptcache_data_manager, gptcache_pre_function)
|
||||
from embedchain.client import Client
|
||||
from embedchain.config import AppConfig, CacheConfig, ChunkerConfig
|
||||
from embedchain.constants import SQLITE_PATH
|
||||
from embedchain.core.db.database import get_session
|
||||
from embedchain.core.db.models import DataSource
|
||||
from embedchain.embedchain import EmbedChain
|
||||
from embedchain.embedder.base import BaseEmbedder
|
||||
from embedchain.embedder.openai import OpenAIEmbedder
|
||||
@@ -33,9 +33,6 @@ from embedchain.utils.misc import validate_config
|
||||
from embedchain.vectordb.base import BaseVectorDB
|
||||
from embedchain.vectordb.chroma import ChromaDB
|
||||
|
||||
# Set up the user directory if it doesn't exist already
|
||||
Client.setup_dir()
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class App(EmbedChain):
|
||||
@@ -120,6 +117,9 @@ class App(EmbedChain):
|
||||
self.llm = llm or OpenAILlm()
|
||||
self._init_db()
|
||||
|
||||
# Session for the metadata db
|
||||
self.db_session = get_session()
|
||||
|
||||
# If cache_config is provided, initializing the cache ...
|
||||
if self.cache_config is not None:
|
||||
self._init_cache()
|
||||
@@ -127,27 +127,6 @@ class App(EmbedChain):
|
||||
# Send anonymous telemetry
|
||||
self._telemetry_props = {"class": self.__class__.__name__}
|
||||
self.telemetry = AnonymousTelemetry(enabled=self.config.collect_metrics)
|
||||
|
||||
# Establish a connection to the SQLite database
|
||||
self.connection = sqlite3.connect(SQLITE_PATH, check_same_thread=False)
|
||||
self.cursor = self.connection.cursor()
|
||||
|
||||
# Create the 'data_sources' table if it doesn't exist
|
||||
self.cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS data_sources (
|
||||
pipeline_id TEXT,
|
||||
hash TEXT,
|
||||
type TEXT,
|
||||
value TEXT,
|
||||
metadata TEXT,
|
||||
is_uploaded INTEGER DEFAULT 0,
|
||||
PRIMARY KEY (pipeline_id, hash)
|
||||
)
|
||||
"""
|
||||
)
|
||||
self.connection.commit()
|
||||
# Send anonymous telemetry
|
||||
self.telemetry.capture(event_name="init", properties=self._telemetry_props)
|
||||
|
||||
self.user_asks = []
|
||||
@@ -307,20 +286,14 @@ class App(EmbedChain):
|
||||
return False
|
||||
|
||||
def _mark_data_as_uploaded(self, data_hash):
|
||||
self.cursor.execute(
|
||||
"UPDATE data_sources SET is_uploaded = 1 WHERE hash = ? AND pipeline_id = ?",
|
||||
(data_hash, self.local_id),
|
||||
)
|
||||
self.connection.commit()
|
||||
self.db_session.query(DataSource).filter_by(hash=data_hash, app_id=self.local_id).update({"is_uploaded": 1})
|
||||
|
||||
def get_data_sources(self):
|
||||
db_data = self.cursor.execute("SELECT * FROM data_sources WHERE pipeline_id = ?", (self.local_id,)).fetchall()
|
||||
|
||||
data_sources = []
|
||||
for data in db_data:
|
||||
data_sources.append({"data_type": data[2], "data_value": data[3], "metadata": data[4]})
|
||||
|
||||
return data_sources
|
||||
data_sources = self.db_session.query(DataSource).filter_by(app_id=self.local_id).all()
|
||||
results = []
|
||||
for row in data_sources:
|
||||
results.append({"data_type": row.data_type, "data_value": row.data_value, "metadata": row.metadata})
|
||||
return results
|
||||
|
||||
def deploy(self):
|
||||
if self.client is None:
|
||||
@@ -329,14 +302,11 @@ class App(EmbedChain):
|
||||
pipeline_data = self._create_pipeline()
|
||||
self.id = pipeline_data["id"]
|
||||
|
||||
results = self.cursor.execute(
|
||||
"SELECT * FROM data_sources WHERE pipeline_id = ? AND is_uploaded = 0", (self.local_id,) # noqa:E501
|
||||
).fetchall()
|
||||
|
||||
results = self.db_session.query(DataSource).filter_by(app_id=self.local_id, is_uploaded=0).all()
|
||||
if len(results) > 0:
|
||||
print("🛠️ Adding data to your pipeline...")
|
||||
for result in results:
|
||||
data_hash, data_type, data_value = result[1], result[2], result[3]
|
||||
data_hash, data_type, data_value = result.hash, result.data_type, result.data_value
|
||||
self._process_and_upload_data(data_hash, data_type, data_value)
|
||||
|
||||
# Send anonymous telemetry
|
||||
@@ -423,10 +393,6 @@ class App(EmbedChain):
|
||||
else:
|
||||
cache_config = None
|
||||
|
||||
# Send anonymous telemetry
|
||||
event_properties = {"init_type": "config_data"}
|
||||
AnonymousTelemetry().capture(event_name="init", properties=event_properties)
|
||||
|
||||
return cls(
|
||||
config=app_config,
|
||||
llm=llm,
|
||||
|
||||
@@ -5,7 +5,8 @@ import uuid
|
||||
|
||||
import requests
|
||||
|
||||
from embedchain.constants import CONFIG_DIR, CONFIG_FILE
|
||||
from embedchain.constants import CONFIG_DIR, CONFIG_FILE, DB_URI
|
||||
from embedchain.core.db.database import init_db, setup_engine
|
||||
|
||||
|
||||
class Client:
|
||||
@@ -31,7 +32,7 @@ class Client:
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def setup_dir(cls):
|
||||
def setup(cls):
|
||||
"""
|
||||
Loads the user id from the config file if it exists, otherwise generates a new
|
||||
one and saves it to the config file.
|
||||
@@ -40,6 +41,9 @@ class Client:
|
||||
:rtype: str
|
||||
"""
|
||||
os.makedirs(CONFIG_DIR, exist_ok=True)
|
||||
setup_engine(database_uri=DB_URI)
|
||||
init_db()
|
||||
|
||||
if os.path.exists(CONFIG_FILE):
|
||||
with open(CONFIG_FILE, "r") as f:
|
||||
data = json.load(f)
|
||||
@@ -53,7 +57,7 @@ class Client:
|
||||
@classmethod
|
||||
def load_config(cls):
|
||||
if not os.path.exists(CONFIG_FILE):
|
||||
cls.setup_dir()
|
||||
cls.setup()
|
||||
|
||||
with open(CONFIG_FILE, "r") as config_file:
|
||||
return json.load(config_file)
|
||||
|
||||
@@ -6,3 +6,4 @@ HOME_DIR = str(Path.home())
|
||||
CONFIG_DIR = os.path.join(HOME_DIR, ".embedchain")
|
||||
CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json")
|
||||
SQLITE_PATH = os.path.join(CONFIG_DIR, "embedchain.db")
|
||||
DB_URI = f"sqlite:///{SQLITE_PATH}"
|
||||
|
||||
0
embedchain/core/__init__.py
Normal file
0
embedchain/core/__init__.py
Normal file
0
embedchain/core/db/__init__.py
Normal file
0
embedchain/core/db/__init__.py
Normal file
83
embedchain/core/db/database.py
Normal file
83
embedchain/core/db/database.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import os
|
||||
|
||||
from alembic import command
|
||||
from alembic.config import Config
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.engine.base import Engine
|
||||
from sqlalchemy.orm import Session as SQLAlchemySession
|
||||
from sqlalchemy.orm import scoped_session, sessionmaker
|
||||
|
||||
from .models import Base
|
||||
|
||||
|
||||
class DatabaseManager:
|
||||
def __init__(self, database_uri: str = "sqlite:///embedchain.db", echo: bool = False):
|
||||
self.database_uri = database_uri
|
||||
self.echo = echo
|
||||
self.engine: Engine = None
|
||||
self._session_factory = None
|
||||
|
||||
def setup_engine(self) -> None:
|
||||
"""Initializes the database engine and session factory."""
|
||||
self.engine = create_engine(self.database_uri, echo=self.echo, connect_args={"check_same_thread": False})
|
||||
self._session_factory = scoped_session(sessionmaker(bind=self.engine))
|
||||
Base.metadata.bind = self.engine
|
||||
|
||||
def init_db(self) -> None:
|
||||
"""Creates all tables defined in the Base metadata."""
|
||||
if not self.engine:
|
||||
raise RuntimeError("Database engine is not initialized. Call setup_engine() first.")
|
||||
Base.metadata.create_all(self.engine)
|
||||
|
||||
def get_session(self) -> SQLAlchemySession:
|
||||
"""Provides a session for database operations."""
|
||||
if not self._session_factory:
|
||||
raise RuntimeError("Session factory is not initialized. Call setup_engine() first.")
|
||||
return self._session_factory()
|
||||
|
||||
def close_session(self) -> None:
|
||||
"""Closes the current session."""
|
||||
if self._session_factory:
|
||||
self._session_factory.remove()
|
||||
|
||||
def execute_transaction(self, transaction_block):
|
||||
"""Executes a block of code within a database transaction."""
|
||||
session = self.get_session()
|
||||
try:
|
||||
transaction_block(session)
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
raise e
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
|
||||
# Singleton pattern to use throughout the application
|
||||
database_manager = DatabaseManager()
|
||||
|
||||
|
||||
# Convenience functions for backward compatibility and ease of use
|
||||
def setup_engine(database_uri: str = "sqlite:///embedchain.db", echo: bool = False) -> None:
|
||||
database_manager.database_uri = database_uri
|
||||
database_manager.echo = echo
|
||||
database_manager.setup_engine()
|
||||
|
||||
|
||||
def alembic_upgrade() -> None:
|
||||
"""Upgrades the database to the latest version."""
|
||||
alembic_config_path = os.path.join(os.path.dirname(__file__), "..", "..", "alembic.ini")
|
||||
alembic_cfg = Config(alembic_config_path)
|
||||
command.upgrade(alembic_cfg, "head")
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
alembic_upgrade()
|
||||
|
||||
|
||||
def get_session() -> SQLAlchemySession:
|
||||
return database_manager.get_session()
|
||||
|
||||
|
||||
def execute_transaction(transaction_block):
|
||||
database_manager.execute_transaction(transaction_block)
|
||||
31
embedchain/core/db/models.py
Normal file
31
embedchain/core/db/models.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import TIMESTAMP, Column, Integer, String, Text, func
|
||||
from sqlalchemy.orm import declarative_base
|
||||
|
||||
Base = declarative_base()
|
||||
metadata = Base.metadata
|
||||
|
||||
|
||||
class DataSource(Base):
|
||||
__tablename__ = "ec_data_sources"
|
||||
|
||||
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
app_id = Column(Text, index=True)
|
||||
hash = Column(Text, index=True)
|
||||
type = Column(Text, index=True)
|
||||
value = Column(Text)
|
||||
meta_data = Column(Text, name="metadata")
|
||||
is_uploaded = Column(Integer, default=0)
|
||||
|
||||
|
||||
class ChatHistory(Base):
|
||||
__tablename__ = "ec_chat_history"
|
||||
|
||||
app_id = Column(String, primary_key=True)
|
||||
id = Column(String, primary_key=True)
|
||||
session_id = Column(String, primary_key=True, index=True)
|
||||
question = Column(Text)
|
||||
answer = Column(Text)
|
||||
meta_data = Column(Text, name="metadata")
|
||||
created_at = Column(TIMESTAMP, default=func.current_timestamp(), index=True)
|
||||
@@ -1,7 +1,6 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from dotenv import load_dotenv
|
||||
@@ -13,7 +12,7 @@ from embedchain.cache import (adapt, get_gptcache_session,
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config import AddConfig, BaseLlmConfig, ChunkerConfig
|
||||
from embedchain.config.base_app_config import BaseAppConfig
|
||||
from embedchain.constants import SQLITE_PATH
|
||||
from embedchain.core.db.models import DataSource
|
||||
from embedchain.data_formatter import DataFormatter
|
||||
from embedchain.embedder.base import BaseEmbedder
|
||||
from embedchain.helpers.json_serializable import JSONSerializable
|
||||
@@ -21,7 +20,6 @@ from embedchain.llm.base import BaseLlm
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.models.data_type import (DataType, DirectDataType,
|
||||
IndirectDataType, SpecialDataType)
|
||||
from embedchain.telemetry.posthog import AnonymousTelemetry
|
||||
from embedchain.utils.misc import detect_datatype, is_valid_json_string
|
||||
from embedchain.vectordb.base import BaseVectorDB
|
||||
|
||||
@@ -85,30 +83,6 @@ class EmbedChain(JSONSerializable):
|
||||
self.user_asks = []
|
||||
|
||||
self.chunker: Optional[ChunkerConfig] = None
|
||||
# Send anonymous telemetry
|
||||
self._telemetry_props = {"class": self.__class__.__name__}
|
||||
self.telemetry = AnonymousTelemetry(enabled=self.config.collect_metrics)
|
||||
# Establish a connection to the SQLite database
|
||||
self.connection = sqlite3.connect(SQLITE_PATH, check_same_thread=False)
|
||||
self.cursor = self.connection.cursor()
|
||||
|
||||
# Create the 'data_sources' table if it doesn't exist
|
||||
self.cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS data_sources (
|
||||
pipeline_id TEXT,
|
||||
hash TEXT,
|
||||
type TEXT,
|
||||
value TEXT,
|
||||
metadata TEXT,
|
||||
is_uploaded INTEGER DEFAULT 0,
|
||||
PRIMARY KEY (pipeline_id, hash)
|
||||
)
|
||||
"""
|
||||
)
|
||||
self.connection.commit()
|
||||
# Send anonymous telemetry
|
||||
self.telemetry.capture(event_name="init", properties=self._telemetry_props)
|
||||
|
||||
@property
|
||||
def collect_metrics(self):
|
||||
@@ -204,17 +178,21 @@ class EmbedChain(JSONSerializable):
|
||||
if data_type in {DataType.DOCS_SITE}:
|
||||
self.is_docs_site_instance = True
|
||||
|
||||
# Insert the data into the 'data' table
|
||||
self.cursor.execute(
|
||||
"""
|
||||
INSERT OR REPLACE INTO data_sources (hash, pipeline_id, type, value, metadata)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""",
|
||||
(source_hash, self.config.id, data_type.value, str(source), json.dumps(metadata)),
|
||||
# Insert the data into the 'ec_data_sources' table
|
||||
self.db_session.add(
|
||||
DataSource(
|
||||
hash=source_hash,
|
||||
app_id=self.config.id,
|
||||
type=data_type.value,
|
||||
value=source,
|
||||
metadata=json.dumps(metadata),
|
||||
)
|
||||
)
|
||||
|
||||
# Commit the transaction
|
||||
self.connection.commit()
|
||||
try:
|
||||
self.db_session.commit()
|
||||
except Exception as e:
|
||||
logging.error(f"Error adding data source: {e}")
|
||||
self.db_session.rollback()
|
||||
|
||||
if dry_run:
|
||||
data_chunks_info = {"chunks": documents, "metadata": metadatas, "count": len(documents), "type": data_type}
|
||||
@@ -666,9 +644,14 @@ class EmbedChain(JSONSerializable):
|
||||
Resets the database. Deletes all embeddings irreversibly.
|
||||
`App` does not have to be reinitialized after using this method.
|
||||
"""
|
||||
try:
|
||||
self.db_session.query(DataSource).filter_by(app_id=self.config.id).delete()
|
||||
self.db_session.commit()
|
||||
except Exception as e:
|
||||
logging.error(f"Error deleting chat history: {e}")
|
||||
self.db_session.rollback()
|
||||
return None
|
||||
self.db.reset()
|
||||
self.cursor.execute("DELETE FROM data_sources WHERE pipeline_id = ?", (self.config.id,))
|
||||
self.connection.commit()
|
||||
self.delete_all_chat_history(app_id=self.config.id)
|
||||
# Send anonymous telemetry
|
||||
self.telemetry.capture(event_name="reset", properties=self._telemetry_props)
|
||||
|
||||
@@ -1,55 +1,40 @@
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import uuid
|
||||
from typing import Any, Optional
|
||||
|
||||
from embedchain.constants import SQLITE_PATH
|
||||
from embedchain.core.db.database import get_session
|
||||
from embedchain.core.db.models import ChatHistory as ChatHistoryModel
|
||||
from embedchain.memory.message import ChatMessage
|
||||
from embedchain.memory.utils import merge_metadata_dict
|
||||
|
||||
CHAT_MESSAGE_CREATE_TABLE_QUERY = """
|
||||
CREATE TABLE IF NOT EXISTS ec_chat_history (
|
||||
app_id TEXT,
|
||||
id TEXT,
|
||||
session_id TEXT,
|
||||
question TEXT,
|
||||
answer TEXT,
|
||||
metadata TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (id, app_id, session_id)
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
class ChatHistory:
|
||||
def __init__(self) -> None:
|
||||
with sqlite3.connect(SQLITE_PATH, check_same_thread=False) as self.connection:
|
||||
self.cursor = self.connection.cursor()
|
||||
self.cursor.execute(CHAT_MESSAGE_CREATE_TABLE_QUERY)
|
||||
self.connection.commit()
|
||||
self.db_session = get_session()
|
||||
|
||||
def add(self, app_id, session_id, chat_message: ChatMessage) -> Optional[str]:
|
||||
memory_id = str(uuid.uuid4())
|
||||
metadata_dict = merge_metadata_dict(chat_message.human_message.metadata, chat_message.ai_message.metadata)
|
||||
if metadata_dict:
|
||||
metadata = self._serialize_json(metadata_dict)
|
||||
ADD_CHAT_MESSAGE_QUERY = """
|
||||
INSERT INTO ec_chat_history (app_id, id, session_id, question, answer, metadata)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
self.cursor.execute(
|
||||
ADD_CHAT_MESSAGE_QUERY,
|
||||
(
|
||||
app_id,
|
||||
memory_id,
|
||||
session_id,
|
||||
chat_message.human_message.content,
|
||||
chat_message.ai_message.content,
|
||||
metadata if metadata_dict else "{}",
|
||||
),
|
||||
self.db_session.add(
|
||||
ChatHistoryModel(
|
||||
app_id=app_id,
|
||||
id=memory_id,
|
||||
session_id=session_id,
|
||||
question=chat_message.human_message.content,
|
||||
answer=chat_message.ai_message.content,
|
||||
metadata=metadata if metadata_dict else "{}",
|
||||
)
|
||||
)
|
||||
self.connection.commit()
|
||||
try:
|
||||
self.db_session.commit()
|
||||
except Exception as e:
|
||||
logging.error(f"Error adding chat memory to db: {e}")
|
||||
self.db_session.rollback()
|
||||
return None
|
||||
|
||||
logging.info(f"Added chat memory to db with id: {memory_id}")
|
||||
return memory_id
|
||||
|
||||
@@ -63,15 +48,15 @@ class ChatHistory:
|
||||
|
||||
:return: None
|
||||
"""
|
||||
params = {"app_id": app_id}
|
||||
if session_id:
|
||||
DELETE_CHAT_HISTORY_QUERY = "DELETE FROM ec_chat_history WHERE app_id=? AND session_id=?"
|
||||
params = (app_id, session_id)
|
||||
else:
|
||||
DELETE_CHAT_HISTORY_QUERY = "DELETE FROM ec_chat_history WHERE app_id=?"
|
||||
params = (app_id,)
|
||||
|
||||
self.cursor.execute(DELETE_CHAT_HISTORY_QUERY, params)
|
||||
self.connection.commit()
|
||||
params["session_id"] = session_id
|
||||
self.db_session.query(ChatHistoryModel).filter_by(**params).delete()
|
||||
try:
|
||||
self.db_session.commit()
|
||||
except Exception as e:
|
||||
logging.error(f"Error deleting chat history: {e}")
|
||||
self.db_session.rollback()
|
||||
|
||||
def get(
|
||||
self, app_id, session_id: str = "default", num_rounds=10, fetch_all: bool = False, display_format=False
|
||||
@@ -85,50 +70,31 @@ class ChatHistory:
|
||||
param: fetch_all (optional) - Whether to fetch all chat history or not. Defaults to False
|
||||
param: display_format (optional) - Whether to return the chat history in display format. Defaults to False
|
||||
"""
|
||||
|
||||
base_query = """
|
||||
SELECT * FROM ec_chat_history
|
||||
WHERE app_id=?
|
||||
"""
|
||||
|
||||
if fetch_all:
|
||||
additional_query = "ORDER BY created_at ASC"
|
||||
params = (app_id,)
|
||||
else:
|
||||
additional_query = """
|
||||
AND session_id=?
|
||||
ORDER BY created_at ASC
|
||||
LIMIT ?
|
||||
"""
|
||||
params = (app_id, session_id, num_rounds)
|
||||
|
||||
QUERY = base_query + additional_query
|
||||
|
||||
self.cursor.execute(
|
||||
QUERY,
|
||||
params,
|
||||
params = {"app_id": app_id}
|
||||
if not fetch_all:
|
||||
params["session_id"] = session_id
|
||||
results = (
|
||||
self.db_session.query(ChatHistoryModel).filter_by(**params).order_by(ChatHistoryModel.created_at.asc())
|
||||
)
|
||||
|
||||
results = self.cursor.fetchall()
|
||||
results = results.limit(num_rounds) if not fetch_all else results
|
||||
history = []
|
||||
for result in results:
|
||||
app_id, _, session_id, question, answer, metadata, timestamp = result
|
||||
metadata = self._deserialize_json(metadata=metadata)
|
||||
metadata = self._deserialize_json(metadata=result.meta_data or "{}")
|
||||
# Return list of dict if display_format is True
|
||||
if display_format:
|
||||
history.append(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"human": question,
|
||||
"ai": answer,
|
||||
"metadata": metadata,
|
||||
"timestamp": timestamp,
|
||||
"session_id": result.session_id,
|
||||
"human": result.question,
|
||||
"ai": result.answer,
|
||||
"metadata": result.meta_data,
|
||||
"timestamp": result.created_at,
|
||||
}
|
||||
)
|
||||
else:
|
||||
memory = ChatMessage()
|
||||
memory.add_user_message(question, metadata=metadata)
|
||||
memory.add_ai_message(answer, metadata=metadata)
|
||||
memory.add_user_message(result.question, metadata=metadata)
|
||||
memory.add_ai_message(result.answer, metadata=metadata)
|
||||
history.append(memory)
|
||||
return history
|
||||
|
||||
@@ -141,16 +107,11 @@ class ChatHistory:
|
||||
|
||||
:return: The number of chat messages for a given app_id and session_id
|
||||
"""
|
||||
# Rewrite the logic below with sqlalchemy
|
||||
params = {"app_id": app_id}
|
||||
if session_id:
|
||||
QUERY = "SELECT COUNT(*) FROM ec_chat_history WHERE app_id=? AND session_id=?"
|
||||
params = (app_id, session_id)
|
||||
else:
|
||||
QUERY = "SELECT COUNT(*) FROM ec_chat_history WHERE app_id=?"
|
||||
params = (app_id,)
|
||||
|
||||
self.cursor.execute(QUERY, params)
|
||||
count = self.cursor.fetchone()[0]
|
||||
return count
|
||||
params["session_id"] = session_id
|
||||
return self.db_session.query(ChatHistoryModel).filter_by(**params).count()
|
||||
|
||||
@staticmethod
|
||||
def _serialize_json(metadata: dict[str, Any]):
|
||||
|
||||
74
embedchain/migrations/env.py
Normal file
74
embedchain/migrations/env.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
from embedchain.constants import DB_URI
|
||||
from embedchain.core.db.models import Base
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
config.set_main_option("sqlalchemy.url", DB_URI)
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode.
|
||||
|
||||
In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
with connectable.connect() as connection:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
26
embedchain/migrations/script.py.mako
Normal file
26
embedchain/migrations/script.py.mako
Normal file
@@ -0,0 +1,26 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -0,0 +1,62 @@
|
||||
"""Create initial migrations
|
||||
|
||||
Revision ID: 40a327b3debd
|
||||
Revises:
|
||||
Create Date: 2024-02-18 15:29:19.409064
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "40a327b3debd"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table(
|
||||
"ec_chat_history",
|
||||
sa.Column("app_id", sa.String(), nullable=False),
|
||||
sa.Column("id", sa.String(), nullable=False),
|
||||
sa.Column("session_id", sa.String(), nullable=False),
|
||||
sa.Column("question", sa.Text(), nullable=True),
|
||||
sa.Column("answer", sa.Text(), nullable=True),
|
||||
sa.Column("metadata", sa.Text(), nullable=True),
|
||||
sa.Column("created_at", sa.TIMESTAMP(), nullable=True),
|
||||
sa.PrimaryKeyConstraint("app_id", "id", "session_id"),
|
||||
)
|
||||
op.create_index(op.f("ix_ec_chat_history_created_at"), "ec_chat_history", ["created_at"], unique=False)
|
||||
op.create_index(op.f("ix_ec_chat_history_session_id"), "ec_chat_history", ["session_id"], unique=False)
|
||||
op.create_table(
|
||||
"ec_data_sources",
|
||||
sa.Column("id", sa.String(), nullable=False),
|
||||
sa.Column("app_id", sa.Text(), nullable=True),
|
||||
sa.Column("hash", sa.Text(), nullable=True),
|
||||
sa.Column("type", sa.Text(), nullable=True),
|
||||
sa.Column("value", sa.Text(), nullable=True),
|
||||
sa.Column("metadata", sa.Text(), nullable=True),
|
||||
sa.Column("is_uploaded", sa.Integer(), nullable=True),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
op.create_index(op.f("ix_ec_data_sources_hash"), "ec_data_sources", ["hash"], unique=False)
|
||||
op.create_index(op.f("ix_ec_data_sources_app_id"), "ec_data_sources", ["app_id"], unique=False)
|
||||
op.create_index(op.f("ix_ec_data_sources_type"), "ec_data_sources", ["type"], unique=False)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_index(op.f("ix_ec_data_sources_type"), table_name="ec_data_sources")
|
||||
op.drop_index(op.f("ix_ec_data_sources_app_id"), table_name="ec_data_sources")
|
||||
op.drop_index(op.f("ix_ec_data_sources_hash"), table_name="ec_data_sources")
|
||||
op.drop_table("ec_data_sources")
|
||||
op.drop_index(op.f("ix_ec_chat_history_session_id"), table_name="ec_chat_history")
|
||||
op.drop_index(op.f("ix_ec_chat_history_created_at"), table_name="ec_chat_history")
|
||||
op.drop_table("ec_chat_history")
|
||||
# ### end Alembic commands ###
|
||||
@@ -20,7 +20,7 @@ from embedchain.utils.misc import detect_datatype
|
||||
logging.basicConfig(level=logging.WARN)
|
||||
|
||||
# Set up the user directory if it doesn't exist already
|
||||
Client.setup_dir()
|
||||
Client.setup()
|
||||
|
||||
|
||||
class OpenAIAssistant:
|
||||
|
||||
Reference in New Issue
Block a user