From 6df63d9ca717a53a73c9b7c4bc36b9f606584edb Mon Sep 17 00:00:00 2001 From: Sidharth Mohanty Date: Fri, 29 Dec 2023 21:57:46 +0530 Subject: [PATCH] Update notebooks to use dict instead of yaml and remove dataloaders (#1075) --- embedchain/pipeline.py | 2 +- notebooks/anthropic.ipynb | 50 ++++++----------------- notebooks/azure-openai.ipynb | 2 +- notebooks/chromadb.ipynb | 52 ++++++------------------ notebooks/cohere.ipynb | 52 +++++++----------------- notebooks/elasticsearch.ipynb | 50 ++++++----------------- notebooks/gpt4all.ipynb | 65 +++++++++++------------------- notebooks/hugging_face_hub.ipynb | 65 +++++++++++------------------- notebooks/jina.ipynb | 50 ++++++----------------- notebooks/llama2.ipynb | 52 +++++++----------------- notebooks/ollama.ipynb | 61 ++++++++++------------------ notebooks/openai.ipynb | 68 +++++++++++--------------------- notebooks/opensearch.ipynb | 56 +++++++------------------- notebooks/pinecone.ipynb | 48 ++++++---------------- notebooks/together.ipynb | 48 ++++++---------------- notebooks/vertex_ai.ipynb | 63 ++++++++++------------------- 16 files changed, 230 insertions(+), 554 deletions(-) diff --git a/embedchain/pipeline.py b/embedchain/pipeline.py index 65e3e250..6f70bfb5 100644 --- a/embedchain/pipeline.py +++ b/embedchain/pipeline.py @@ -6,4 +6,4 @@ class Pipeline(App): This is deprecated. Use `App` instead. """ - pass \ No newline at end of file + pass diff --git a/notebooks/anthropic.ipynb b/notebooks/anthropic.ipynb index 436ea293..2264d50f 100644 --- a/notebooks/anthropic.ipynb +++ b/notebooks/anthropic.ipynb @@ -31,7 +31,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders]" + "!pip install embedchain" ] }, { @@ -60,45 +60,13 @@ "os.environ[\"ANTHROPIC_API_KEY\"] = \"xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: anthropic\n", - " config:\n", - " model: 'claude-instant-1'\n", - " temperature: 0.5\n", - " top_p: 1\n", - " stream: false\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('anthropic.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3: Create embedchain app and define your config" ] }, { @@ -109,7 +77,15 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"anthropic.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"anthropic\",\n", + " \"config\": {\n", + " \"model\": \"claude-instant-1\",\n", + " \"temperature\": 0.5,\n", + " \"top_p\": 1,\n", + " \"stream\": False\n", + " }\n", + "})" ] }, { @@ -118,7 +94,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -143,7 +119,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/azure-openai.ipynb b/notebooks/azure-openai.ipynb index 3caa5bb9..6d9d1a93 100644 --- a/notebooks/azure-openai.ipynb +++ b/notebooks/azure-openai.ipynb @@ -23,7 +23,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install embedchain[dataloaders]" + "!pip install embedchain" ] }, { diff --git a/notebooks/chromadb.ipynb b/notebooks/chromadb.ipynb index e3b8ce4b..a9f12e94 100644 --- a/notebooks/chromadb.ipynb +++ b/notebooks/chromadb.ipynb @@ -26,7 +26,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders]" + "!pip install embedchain" ] }, { @@ -54,47 +54,13 @@ "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your Vector Database config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "vectordb:\n", - " provider: chroma\n", - " config:\n", - " collection_name: 'my-collection'\n", - " # CHANGE THE BELOW TWO LINES!\n", - " # pass remote database variables - host and port\n", - " host: your-chromadb-url.com\n", - " port: 5200\n", - " allow_reset: true\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('chromadb.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -105,7 +71,15 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"chromadb.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"chroma\",\n", + " \"config\": {\n", + " \"collection_name\": \"my-collection\",\n", + " \"host\": \"your-chromadb-url.com\",\n", + " \"port\": 5200,\n", + " \"allow_reset\": True\n", + " }\n", + "})" ] }, { @@ -114,7 +88,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -134,7 +108,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/cohere.ipynb b/notebooks/cohere.ipynb index d7c9e05a..26df9c83 100644 --- a/notebooks/cohere.ipynb +++ b/notebooks/cohere.ipynb @@ -30,7 +30,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,cohere]" + "!pip install embedchain[cohere]" ] }, { @@ -59,46 +59,13 @@ "os.environ[\"COHERE_API_KEY\"] = \"xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: cohere\n", - " config:\n", - " model: gptd-instruct-tft\n", - " temperature: 0.5\n", - " max_tokens: 1000\n", - " top_p: 1\n", - " stream: false\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('cohere.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -114,7 +81,16 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"cohere.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"cohere\",\n", + " \"config\": {\n", + " \"model\": \"gptd-instruct-tft\",\n", + " \"temperature\": 0.5,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 1,\n", + " \"stream\": False\n", + " }\n", + "})" ] }, { @@ -123,7 +99,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -148,7 +124,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/elasticsearch.ipynb b/notebooks/elasticsearch.ipynb index 77d5dab2..c507efd2 100644 --- a/notebooks/elasticsearch.ipynb +++ b/notebooks/elasticsearch.ipynb @@ -26,7 +26,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,elasticsearch]" + "!pip install embedchain[elasticsearch]" ] }, { @@ -54,45 +54,13 @@ "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your Vector Database config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "vectordb:\n", - " provider: elasticsearch\n", - " config:\n", - " collection_name: 'es-index'\n", - " es_url: your-elasticsearch-url.com\n", - " allow_reset: true\n", - " api_key: xxx\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('elasticsearch.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -103,7 +71,15 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"elasticsearch.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"elasticsearch\",\n", + " \"config\": {\n", + " \"collection_name\": \"es-index\",\n", + " \"es_url\": \"your-elasticsearch-url.com\",\n", + " \"allow_reset\": True,\n", + " \"api_key\": \"xxx\"\n", + " }\n", + "})" ] }, { @@ -112,7 +88,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -132,7 +108,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/gpt4all.ipynb b/notebooks/gpt4all.ipynb index 47e855ef..1bad7ebd 100644 --- a/notebooks/gpt4all.ipynb +++ b/notebooks/gpt4all.ipynb @@ -30,7 +30,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,opensource]" + "!pip install embedchain[opensource]" ] }, { @@ -55,51 +55,13 @@ "from embedchain import App" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: gpt4all\n", - " config:\n", - " model: 'orca-mini-3b-gguf2-q4_0.gguf'\n", - " temperature: 0.5\n", - " max_tokens: 1000\n", - " top_p: 1\n", - " stream: false\n", - "\n", - "embedder:\n", - " provider: gpt4all\n", - " config:\n", - " model: 'all-MiniLM-L6-v2'\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('gpt4all.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -114,7 +76,24 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"gpt4all.yaml\")" + "app = App.from_config(config={\n", + " \"llm\": {\n", + " \"provider\": \"gpt4all\",\n", + " \"config\": {\n", + " \"model\": \"orca-mini-3b-gguf2-q4_0.gguf\",\n", + " \"temperature\": 0.5,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 1,\n", + " \"stream\": False\n", + " }\n", + " },\n", + " \"embedder\": {\n", + " \"provider\": \"gpt4all\",\n", + " \"config\": {\n", + " \"model\": \"all-MiniLM-L6-v2\"\n", + " }\n", + " }\n", + "})" ] }, { @@ -123,7 +102,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -148,7 +127,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/hugging_face_hub.ipynb b/notebooks/hugging_face_hub.ipynb index 15819c57..eff2dc93 100644 --- a/notebooks/hugging_face_hub.ipynb +++ b/notebooks/hugging_face_hub.ipynb @@ -31,7 +31,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,huggingface_hub,opensource]" + "!pip install embedchain[huggingface_hub,opensource]" ] }, { @@ -59,51 +59,13 @@ "os.environ[\"HUGGINGFACE_ACCESS_TOKEN\"] = \"hf_xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: huggingface\n", - " config:\n", - " model: 'google/flan-t5-xxl'\n", - " temperature: 0.5\n", - " max_tokens: 1000\n", - " top_p: 0.8\n", - " stream: false\n", - "\n", - "embedder:\n", - " provider: huggingface\n", - " config:\n", - " model: 'sentence-transformers/all-mpnet-base-v2'\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('huggingface.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -114,7 +76,24 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"huggingface.yaml\")" + "app = App.from_config(config={\n", + " \"llm\": {\n", + " \"provider\": \"huggingface\",\n", + " \"config\": {\n", + " \"model\": \"google/flan-t5-xxl\",\n", + " \"temperature\": 0.5,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 0.8,\n", + " \"stream\": False\n", + " }\n", + " },\n", + " \"embedder\": {\n", + " \"provider\": \"huggingface\",\n", + " \"config\": {\n", + " \"model\": \"sentence-transformers/all-mpnet-base-v2\"\n", + " }\n", + " }\n", + "})" ] }, { @@ -123,7 +102,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -148,7 +127,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/jina.ipynb b/notebooks/jina.ipynb index a100304f..b89a2aa2 100644 --- a/notebooks/jina.ipynb +++ b/notebooks/jina.ipynb @@ -31,7 +31,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders]" + "!pip install embedchain" ] }, { @@ -60,45 +60,13 @@ "os.environ[\"JINACHAT_API_KEY\"] = \"xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: jina\n", - " config:\n", - " temperature: 0.5\n", - " max_tokens: 1000\n", - " top_p: 1\n", - " stream: false\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('jina.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -114,7 +82,15 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"jina.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"jina\",\n", + " \"config\": {\n", + " \"temperature\": 0.5,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 1,\n", + " \"stream\": False\n", + " }\n", + "})" ] }, { @@ -123,7 +99,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -148,7 +124,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/llama2.ipynb b/notebooks/llama2.ipynb index 218c9850..eabf7490 100644 --- a/notebooks/llama2.ipynb +++ b/notebooks/llama2.ipynb @@ -30,7 +30,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,llama2]" + "!pip install embedchain[llama2]" ] }, { @@ -59,46 +59,13 @@ "os.environ[\"REPLICATE_API_TOKEN\"] = \"xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: llama2\n", - " config:\n", - " model: 'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5'\n", - " temperature: 0.5\n", - " max_tokens: 1000\n", - " top_p: 0.5\n", - " stream: false\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('llama2.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -109,7 +76,16 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"llama2.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"llama2\",\n", + " \"config\": {\n", + " \"model\": \"a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5\",\n", + " \"temperature\": 0.5,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 0.5,\n", + " \"stream\": False\n", + " }\n", + "})" ] }, { @@ -118,7 +94,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -143,7 +119,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/ollama.ipynb b/notebooks/ollama.ipynb index 9e3fafc1..550c2362 100644 --- a/notebooks/ollama.ipynb +++ b/notebooks/ollama.ipynb @@ -24,50 +24,13 @@ "- ollama serve" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-2: Define your llm and embedding model config (Going all out local inference, no need for OpenAI API Key)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: ollama\n", - " config:\n", - " model: 'llama2'\n", - " temperature: 0.5\n", - " top_p: 1\n", - " stream: true\n", - "\n", - "embedder:\n", - " provider: huggingface\n", - " config:\n", - " model: 'BAAI/bge-small-en-v1.5'\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('ollama.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-3 Create embedchain app based on the config" + "### Step-2 Create embedchain app and define your config (all local inference)" ] }, { @@ -93,7 +56,23 @@ ], "source": [ "from embedchain import App\n", - "app = App.from_config(config_path=\"ollama.yaml\")" + "app = App.from_config(config={\n", + " \"llm\": {\n", + " \"provider\": \"ollama\",\n", + " \"config\": {\n", + " \"model\": \"llama2\",\n", + " \"temperature\": 0.5,\n", + " \"top_p\": 1,\n", + " \"stream\": True\n", + " }\n", + " },\n", + " \"embedder\": {\n", + " \"provider\": \"huggingface\",\n", + " \"config\": {\n", + " \"model\": \"BAAI/bge-small-en-v1.5\"\n", + " }\n", + " }\n", + "})" ] }, { @@ -102,7 +81,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-4: Add data sources to your app" + "### Step-3: Add data sources to your app" ] }, { @@ -159,7 +138,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-5: All set. Now start asking questions related to your data" + "### Step-4: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/openai.ipynb b/notebooks/openai.ipynb index b488e6ab..408a495a 100644 --- a/notebooks/openai.ipynb +++ b/notebooks/openai.ipynb @@ -31,7 +31,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders]" + "!pip install embedchain" ] }, { @@ -59,51 +59,13 @@ "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: openai\n", - " config:\n", - " model: gpt-3.5-turbo\n", - " temperature: 0.5\n", - " max_tokens: 1000\n", - " top_p: 1\n", - " stream: false\n", - "\n", - "embedder:\n", - " provider: openai\n", - " config:\n", - " model: text-embedding-ada-002\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('openai.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -114,7 +76,24 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"openai.yaml\")" + "app = App.from_config(config={\n", + " \"llm\": {\n", + " \"provider\": \"openai\",\n", + " \"config\": {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"temperature\": 0.5,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 1,\n", + " \"stream\": False\n", + " }\n", + " },\n", + " \"embedder\": {\n", + " \"provider\": \"openai\",\n", + " \"config\": {\n", + " \"model\": \"text-embedding-ada-002\"\n", + " }\n", + " }\n", + "})" ] }, { @@ -123,7 +102,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -143,7 +122,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { @@ -172,7 +151,8 @@ "name": "python3" }, "language_info": { - "name": "python" + "name": "python", + "version": "3.11.6" } }, "nbformat": 4, diff --git a/notebooks/opensearch.ipynb b/notebooks/opensearch.ipynb index d130df2e..f9e678db 100644 --- a/notebooks/opensearch.ipynb +++ b/notebooks/opensearch.ipynb @@ -26,7 +26,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,opensearch]" + "!pip install embedchain[opensearch]" ] }, { @@ -54,49 +54,13 @@ "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your Vector Database config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "vectordb:\n", - " provider: opensearch\n", - " config:\n", - " opensearch_url: 'your-opensearch-url.com'\n", - " http_auth:\n", - " - admin\n", - " - admin\n", - " vector_dimension: 1536\n", - " collection_name: 'my-app'\n", - " use_ssl: false\n", - " verify_certs: false\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('opensearch.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -107,7 +71,17 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"opensearch.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"opensearch\",\n", + " \"config\": {\n", + " \"opensearch_url\": \"your-opensearch-url.com\",\n", + " \"http_auth\": [\"admin\", \"admin\"],\n", + " \"vector_dimension\": 1536,\n", + " \"collection_name\": \"my-app\",\n", + " \"use_ssl\": False,\n", + " \"verify_certs\": False\n", + " }\n", + "})" ] }, { @@ -116,7 +90,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -136,7 +110,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/pinecone.ipynb b/notebooks/pinecone.ipynb index 7facae33..c0b90a8b 100644 --- a/notebooks/pinecone.ipynb +++ b/notebooks/pinecone.ipynb @@ -26,7 +26,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,pinecone]" + "!pip install embedchain[pinecone]" ] }, { @@ -56,44 +56,13 @@ "os.environ[\"PINECONE_ENV\"] = \"xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your Vector Database config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "vectordb:\n", - " provider: pinecone\n", - " config:\n", - " metric: cosine\n", - " vector_dimension: 768\n", - " collection_name: pc-index\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('pinecone.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -104,7 +73,14 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"pinecone.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"pinecone\",\n", + " \"config\": {\n", + " \"metric\": \"cosine\",\n", + " \"vector_dimension\": 768,\n", + " \"collection_name\": \"pc-index\"\n", + " }\n", + "})" ] }, { @@ -113,7 +89,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -133,7 +109,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/together.ipynb b/notebooks/together.ipynb index e9d3511e..c2645cde 100644 --- a/notebooks/together.ipynb +++ b/notebooks/together.ipynb @@ -30,7 +30,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,together]" + "!pip install embedchain[together]" ] }, { @@ -59,44 +59,13 @@ "os.environ[\"TOGETHER_API_KEY\"] = \"\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: together\n", - " config:\n", - " model: mistralai/Mixtral-8x7B-Instruct-v0.1\n", - " temperature: 0.5\n", - " max_tokens: 1000\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('together.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -112,7 +81,14 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"together.yaml\")" + "app = App.from_config(config={\n", + " \"provider\": \"together\",\n", + " \"config\": {\n", + " \"model\": \"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n", + " \"temperature\": 0.5,\n", + " \"max_tokens\": 1000\n", + " }\n", + "})" ] }, { @@ -121,7 +97,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -178,7 +154,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, { diff --git a/notebooks/vertex_ai.ipynb b/notebooks/vertex_ai.ipynb index 54e2cc51..1cb41a77 100644 --- a/notebooks/vertex_ai.ipynb +++ b/notebooks/vertex_ai.ipynb @@ -30,7 +30,7 @@ }, "outputs": [], "source": [ - "!pip install embedchain[dataloaders,vertexai]" + "!pip install embedchain[vertexai]" ] }, { @@ -58,50 +58,13 @@ "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ns6RhPfbiitr" - }, - "source": [ - "### Step-3: Define your llm and embedding model config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S9CkxVjriotB" - }, - "outputs": [], - "source": [ - "config = \"\"\"\n", - "llm:\n", - " provider: vertexai\n", - " config:\n", - " model: 'chat-bison'\n", - " temperature: 0.5\n", - " max_tokens: 1000\n", - " stream: false\n", - "\n", - "embedder:\n", - " provider: vertexai\n", - " config:\n", - " model: 'textembedding-gecko'\n", - "\"\"\"\n", - "\n", - "# Write the multi-line string to a YAML file\n", - "with open('vertexai.yaml', 'w') as file:\n", - " file.write(config)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "PGt6uPLIi1CS" }, "source": [ - "### Step-4 Create embedchain app based on the config" + "### Step-3 Create embedchain app and define your config" ] }, { @@ -117,7 +80,23 @@ }, "outputs": [], "source": [ - "app = App.from_config(config_path=\"vertexai.yaml\")" + "app = App.from_config(config={\n", + " \"llm\": {\n", + " \"provider\": \"vertexai\",\n", + " \"config\": {\n", + " \"model\": \"chat-bison\",\n", + " \"temperature\": 0.5,\n", + " \"max_tokens\": 1000,\n", + " \"stream\": False\n", + " }\n", + " },\n", + " \"embedder\": {\n", + " \"provider\": \"vertexai\",\n", + " \"config\": {\n", + " \"model\": \"textembedding-gecko\"\n", + " }\n", + " }\n", + "})" ] }, { @@ -126,7 +105,7 @@ "id": "XNXv4yZwi7ef" }, "source": [ - "### Step-5: Add data sources to your app" + "### Step-4: Add data sources to your app" ] }, { @@ -146,7 +125,7 @@ "id": "_7W6fDeAjMAP" }, "source": [ - "### Step-6: All set. Now start asking questions related to your data" + "### Step-5: All set. Now start asking questions related to your data" ] }, {