[Feat]: Add support for running chromadb in server mode with embedchain (#220)

This commit is contained in:
Deshraj Yadav
2023-07-11 03:17:40 -07:00
committed by GitHub
parent 9ca836520f
commit 73dd7151cb
4 changed files with 130 additions and 11 deletions

View File

@@ -8,7 +8,7 @@ PROJECT_NAME := embedchain
install:
$(PIP) install --upgrade pip
$(PIP) install .[dev]
$(PIP) install -e .[dev]
format:
$(PYTHON) -m black .

View File

@@ -9,7 +9,7 @@ class InitConfig(BaseConfig):
Config to initialize an embedchain `App` instance.
"""
def __init__(self, log_level=None, ef=None, db=None):
def __init__(self, log_level=None, ef=None, db=None, host=None, port=None):
"""
:param log_level: Optional. (String) Debug level
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
@@ -33,7 +33,7 @@ class InitConfig(BaseConfig):
if db is None:
from embedchain.vectordb.chroma_db import ChromaDB
self.db = ChromaDB(ef=self.ef)
self.db = ChromaDB(ef=self.ef, host=host, port=port)
else:
self.db = db

View File

@@ -9,7 +9,7 @@ from embedchain.vectordb.base_vector_db import BaseVectorDB
class ChromaDB(BaseVectorDB):
"""Vector database using ChromaDB."""
def __init__(self, db_dir=None, ef=None):
def __init__(self, db_dir=None, ef=None, host=None, port=None):
if ef:
self.ef = ef
else:
@@ -18,13 +18,21 @@ class ChromaDB(BaseVectorDB):
organization_id=os.getenv("OPENAI_ORGANIZATION"),
model_name="text-embedding-ada-002",
)
if db_dir is None:
db_dir = "db"
self.client_settings = chromadb.config.Settings(
chroma_db_impl="duckdb+parquet",
persist_directory=db_dir,
anonymized_telemetry=False,
)
if host and port:
self.client_settings = chromadb.config.Settings(
chroma_api_impl="rest",
chroma_server_host=host,
chroma_server_http_port=port,
)
else:
if db_dir is None:
db_dir = "db"
self.client_settings = chromadb.config.Settings(
chroma_db_impl="duckdb+parquet",
persist_directory=db_dir,
anonymized_telemetry=False,
)
super().__init__()
def _get_or_create_db(self):

View File

@@ -0,0 +1,111 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "553f2e71",
"metadata": {},
"source": [
"## Embedchain chromadb server example"
]
},
{
"cell_type": "markdown",
"id": "513e12e6",
"metadata": {},
"source": [
"This notebook shows an example of how you can use embedchain with chromdb (server). \n",
"\n",
"\n",
"First, run chroma inside docker using the following command:\n",
"\n",
"\n",
"```bash\n",
"git clone https://github.com/chroma-core/chroma\n",
"cd chroma && docker-compose up -d --build\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "92e7ad71",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from embedchain import App\n",
"from embedchain.config import InitConfig\n",
"\n",
"\n",
"chromadb_host = \"localhost\"\n",
"chromadb_port = 8000\n",
"\n",
"config = InitConfig(host=chromadb_host, port=chromadb_port)\n",
"elon_bot = App(config)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1a6d6841",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"All data from https://en.wikipedia.org/wiki/Elon_Musk already exists in the database.\n",
"All data from https://www.tesla.com/elon-musk already exists in the database.\n"
]
}
],
"source": [
"# Embed Online Resources\n",
"elon_bot.add(\"web_page\", \"https://en.wikipedia.org/wiki/Elon_Musk\")\n",
"elon_bot.add(\"web_page\", \"https://www.tesla.com/elon-musk\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "34cda99c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Elon Musk runs four companies: Tesla, SpaceX, Neuralink, and The Boring Company.'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"elon_bot.query(\"How many companies does Elon Musk run?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}