diff --git a/Makefile b/Makefile index b9e1dbe0..f0a50090 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ PROJECT_NAME := embedchain install: $(PIP) install --upgrade pip - $(PIP) install .[dev] + $(PIP) install -e .[dev] format: $(PYTHON) -m black . diff --git a/embedchain/config/InitConfig.py b/embedchain/config/InitConfig.py index 90b562d3..f0614956 100644 --- a/embedchain/config/InitConfig.py +++ b/embedchain/config/InitConfig.py @@ -9,7 +9,7 @@ class InitConfig(BaseConfig): Config to initialize an embedchain `App` instance. """ - def __init__(self, log_level=None, ef=None, db=None): + def __init__(self, log_level=None, ef=None, db=None, host=None, port=None): """ :param log_level: Optional. (String) Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']. @@ -33,7 +33,7 @@ class InitConfig(BaseConfig): if db is None: from embedchain.vectordb.chroma_db import ChromaDB - self.db = ChromaDB(ef=self.ef) + self.db = ChromaDB(ef=self.ef, host=host, port=port) else: self.db = db diff --git a/embedchain/vectordb/chroma_db.py b/embedchain/vectordb/chroma_db.py index de4887e1..40fc6b64 100644 --- a/embedchain/vectordb/chroma_db.py +++ b/embedchain/vectordb/chroma_db.py @@ -9,7 +9,7 @@ from embedchain.vectordb.base_vector_db import BaseVectorDB class ChromaDB(BaseVectorDB): """Vector database using ChromaDB.""" - def __init__(self, db_dir=None, ef=None): + def __init__(self, db_dir=None, ef=None, host=None, port=None): if ef: self.ef = ef else: @@ -18,13 +18,21 @@ class ChromaDB(BaseVectorDB): organization_id=os.getenv("OPENAI_ORGANIZATION"), model_name="text-embedding-ada-002", ) - if db_dir is None: - db_dir = "db" - self.client_settings = chromadb.config.Settings( - chroma_db_impl="duckdb+parquet", - persist_directory=db_dir, - anonymized_telemetry=False, - ) + + if host and port: + self.client_settings = chromadb.config.Settings( + chroma_api_impl="rest", + chroma_server_host=host, + chroma_server_http_port=port, + ) + else: + if db_dir is None: + db_dir = "db" + self.client_settings = chromadb.config.Settings( + chroma_db_impl="duckdb+parquet", + persist_directory=db_dir, + anonymized_telemetry=False, + ) super().__init__() def _get_or_create_db(self): diff --git a/notebooks/embedchain-chromadb-server.ipynb b/notebooks/embedchain-chromadb-server.ipynb new file mode 100644 index 00000000..a5adde38 --- /dev/null +++ b/notebooks/embedchain-chromadb-server.ipynb @@ -0,0 +1,111 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "553f2e71", + "metadata": {}, + "source": [ + "## Embedchain chromadb server example" + ] + }, + { + "cell_type": "markdown", + "id": "513e12e6", + "metadata": {}, + "source": [ + "This notebook shows an example of how you can use embedchain with chromdb (server). \n", + "\n", + "\n", + "First, run chroma inside docker using the following command:\n", + "\n", + "\n", + "```bash\n", + "git clone https://github.com/chroma-core/chroma\n", + "cd chroma && docker-compose up -d --build\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "92e7ad71", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "from embedchain.config import InitConfig\n", + "\n", + "\n", + "chromadb_host = \"localhost\"\n", + "chromadb_port = 8000\n", + "\n", + "config = InitConfig(host=chromadb_host, port=chromadb_port)\n", + "elon_bot = App(config)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1a6d6841", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All data from https://en.wikipedia.org/wiki/Elon_Musk already exists in the database.\n", + "All data from https://www.tesla.com/elon-musk already exists in the database.\n" + ] + } + ], + "source": [ + "# Embed Online Resources\n", + "elon_bot.add(\"web_page\", \"https://en.wikipedia.org/wiki/Elon_Musk\")\n", + "elon_bot.add(\"web_page\", \"https://www.tesla.com/elon-musk\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "34cda99c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Elon Musk runs four companies: Tesla, SpaceX, Neuralink, and The Boring Company.'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "elon_bot.query(\"How many companies does Elon Musk run?\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}