[Feat]: Add support for running chromadb in server mode with embedchain (#220)
This commit is contained in:
2
Makefile
2
Makefile
@@ -8,7 +8,7 @@ PROJECT_NAME := embedchain
|
||||
|
||||
install:
|
||||
$(PIP) install --upgrade pip
|
||||
$(PIP) install .[dev]
|
||||
$(PIP) install -e .[dev]
|
||||
|
||||
format:
|
||||
$(PYTHON) -m black .
|
||||
|
||||
@@ -9,7 +9,7 @@ class InitConfig(BaseConfig):
|
||||
Config to initialize an embedchain `App` instance.
|
||||
"""
|
||||
|
||||
def __init__(self, log_level=None, ef=None, db=None):
|
||||
def __init__(self, log_level=None, ef=None, db=None, host=None, port=None):
|
||||
"""
|
||||
:param log_level: Optional. (String) Debug level
|
||||
['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
|
||||
@@ -33,7 +33,7 @@ class InitConfig(BaseConfig):
|
||||
if db is None:
|
||||
from embedchain.vectordb.chroma_db import ChromaDB
|
||||
|
||||
self.db = ChromaDB(ef=self.ef)
|
||||
self.db = ChromaDB(ef=self.ef, host=host, port=port)
|
||||
else:
|
||||
self.db = db
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from embedchain.vectordb.base_vector_db import BaseVectorDB
|
||||
class ChromaDB(BaseVectorDB):
|
||||
"""Vector database using ChromaDB."""
|
||||
|
||||
def __init__(self, db_dir=None, ef=None):
|
||||
def __init__(self, db_dir=None, ef=None, host=None, port=None):
|
||||
if ef:
|
||||
self.ef = ef
|
||||
else:
|
||||
@@ -18,13 +18,21 @@ class ChromaDB(BaseVectorDB):
|
||||
organization_id=os.getenv("OPENAI_ORGANIZATION"),
|
||||
model_name="text-embedding-ada-002",
|
||||
)
|
||||
if db_dir is None:
|
||||
db_dir = "db"
|
||||
self.client_settings = chromadb.config.Settings(
|
||||
chroma_db_impl="duckdb+parquet",
|
||||
persist_directory=db_dir,
|
||||
anonymized_telemetry=False,
|
||||
)
|
||||
|
||||
if host and port:
|
||||
self.client_settings = chromadb.config.Settings(
|
||||
chroma_api_impl="rest",
|
||||
chroma_server_host=host,
|
||||
chroma_server_http_port=port,
|
||||
)
|
||||
else:
|
||||
if db_dir is None:
|
||||
db_dir = "db"
|
||||
self.client_settings = chromadb.config.Settings(
|
||||
chroma_db_impl="duckdb+parquet",
|
||||
persist_directory=db_dir,
|
||||
anonymized_telemetry=False,
|
||||
)
|
||||
super().__init__()
|
||||
|
||||
def _get_or_create_db(self):
|
||||
|
||||
111
notebooks/embedchain-chromadb-server.ipynb
Normal file
111
notebooks/embedchain-chromadb-server.ipynb
Normal file
@@ -0,0 +1,111 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "553f2e71",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Embedchain chromadb server example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "513e12e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook shows an example of how you can use embedchain with chromdb (server). \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"First, run chroma inside docker using the following command:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"git clone https://github.com/chroma-core/chroma\n",
|
||||
"cd chroma && docker-compose up -d --build\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "92e7ad71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from embedchain import App\n",
|
||||
"from embedchain.config import InitConfig\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chromadb_host = \"localhost\"\n",
|
||||
"chromadb_port = 8000\n",
|
||||
"\n",
|
||||
"config = InitConfig(host=chromadb_host, port=chromadb_port)\n",
|
||||
"elon_bot = App(config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "1a6d6841",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"All data from https://en.wikipedia.org/wiki/Elon_Musk already exists in the database.\n",
|
||||
"All data from https://www.tesla.com/elon-musk already exists in the database.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Embed Online Resources\n",
|
||||
"elon_bot.add(\"web_page\", \"https://en.wikipedia.org/wiki/Elon_Musk\")\n",
|
||||
"elon_bot.add(\"web_page\", \"https://www.tesla.com/elon-musk\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "34cda99c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Elon Musk runs four companies: Tesla, SpaceX, Neuralink, and The Boring Company.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"elon_bot.query(\"How many companies does Elon Musk run?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user