[feat] Refactor VectorDB class hierarchy for flexibility

This commit is contained in:
Sayo
2023-06-22 12:15:26 +08:00
parent 973dc5434f
commit 85a6a0c161
3 changed files with 43 additions and 35 deletions

View File

@@ -0,0 +1,10 @@
class BaseVectorDB:
def __init__(self):
self.client = self._get_or_create_db()
self.collection = self._get_or_create_collection()
def _get_or_create_db(self):
raise NotImplementedError
def _get_or_create_collection(self):
raise NotImplementedError

View File

@@ -0,0 +1,26 @@
import os
import chromadb
from base_vector_db import BaseVectorDB
from chromadb.utils import embedding_functions
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"),
model_name="text-embedding-ada-002"
)
class ChromaDB(BaseVectorDB):
def __init__(self, db_dir):
self.client_settings = chromadb.config.Settings(
chroma_db_impl="duckdb+parquet",
persist_directory=db_dir,
anonymized_telemetry=False
)
super().__init__()
def _get_or_create_db(self):
return chromadb.Client(self.client_settings)
def _get_or_create_collection(self):
return self.client.get_or_create_collection(
'embedchain_store', embedding_function=openai_ef,
)