diff --git a/notebooks/anthropic.ipynb b/notebooks/anthropic.ipynb new file mode 100644 index 00000000..47ff6cb7 --- /dev/null +++ b/notebooks/anthropic.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using Anthropic with Embedchain\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-NbXjAdlh0vJ", + "outputId": "efdce0dc-fb30-4e01-f5a8-ef1a7f4e8c09" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set Anthropic related environment variables\n", + "\n", + "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and `ANTHROPIC_API_KEY` on your [Anthropic dashboard](https://console.anthropic.com/account/keys)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n", + "os.environ[\"ANTHROPIC_API_KEY\"] = \"xxx\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: anthropic\n", + " config:\n", + " model: 'claude-instant-1'\n", + " temperature: 0.5\n", + " top_p: 1\n", + " stream: false\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('anthropic.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Amzxk3m-i3tD" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"anthropic.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "id": "Sn_0rx9QjIY9", + "outputId": "dc17baec-39b5-4dc8-bd42-f2aad92697eb" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 391 + }, + "id": "cvIK7dWRjN_f", + "outputId": "3d1cb7ce-969e-4dad-d48c-b818b7447cc0" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/chromadb.ipynb b/notebooks/chromadb.ipynb new file mode 100644 index 00000000..16e189b7 --- /dev/null +++ b/notebooks/chromadb.ipynb @@ -0,0 +1,171 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## Cookbook for using ChromaDB with Embedchain" + ], + "metadata": { + "id": "b02n_zJ_hl3d" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step-1: Install embedchain package" + ], + "metadata": { + "id": "gyJ6ui2vhtMY" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install embedchain" + ], + "metadata": { + "id": "-NbXjAdlh0vJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-2: Set OpenAI environment variables\n", + "\n", + "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys)." + ], + "metadata": { + "id": "nGnpSYAAh2bQ" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" + ], + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-3: Define your Vector Database config" + ], + "metadata": { + "id": "Ns6RhPfbiitr" + } + }, + { + "cell_type": "code", + "source": [ + "config = \"\"\"\n", + "vectordb:\n", + " provider: chroma\n", + " config:\n", + " collection_name: 'my-collection'\n", + " # CHANGE THE BELOW TWO LINES!\n", + " # pass remote database variables - host and port\n", + " host: your-chromadb-url.com\n", + " port: 5200\n", + " allow_reset: true\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('chromadb.yaml', 'w') as file:\n", + " file.write(config)" + ], + "metadata": { + "id": "S9CkxVjriotB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-4 Create embedchain app based on the config" + ], + "metadata": { + "id": "PGt6uPLIi1CS" + } + }, + { + "cell_type": "code", + "source": [ + "app = App.from_config(yaml_path=\"chromadb.yaml\")" + ], + "metadata": { + "id": "Amzxk3m-i3tD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-5: Add data sources to your app" + ], + "metadata": { + "id": "XNXv4yZwi7ef" + } + }, + { + "cell_type": "code", + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ], + "metadata": { + "id": "Sn_0rx9QjIY9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ], + "metadata": { + "id": "_7W6fDeAjMAP" + } + }, + { + "cell_type": "code", + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ], + "metadata": { + "id": "cvIK7dWRjN_f" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/cohere.ipynb b/notebooks/cohere.ipynb new file mode 100644 index 00000000..3831b80f --- /dev/null +++ b/notebooks/cohere.ipynb @@ -0,0 +1,205 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using Cohere with Embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-NbXjAdlh0vJ", + "outputId": "fae77912-4e6a-4c78-fcb7-fbbe46f7a9c7" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set Cohere related environment variables and install the dependencies\n", + "\n", + "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and `COHERE_API_KEY` key on your [Cohere dashboard](https://dashboard.cohere.com/api-keys)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "S5jTywPZNtrj", + "outputId": "4a23c813-c9e5-4b6c-e3d9-b41e4fdbc54d" + }, + "outputs": [], + "source": [ + "!pip install embedchain[cohere]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n", + "os.environ[\"COHERE_API_KEY\"] = \"xxx\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: cohere\n", + " config:\n", + " model: gptd-instruct-tft\n", + " temperature: 0.5\n", + " max_tokens: 1000\n", + " top_p: 1\n", + " stream: false\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('cohere.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 321 + }, + "id": "Amzxk3m-i3tD", + "outputId": "afe8afde-5cb8-46bc-c541-3ad26cc3fa6e" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"cohere.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 176 + }, + "id": "Sn_0rx9QjIY9", + "outputId": "2f2718a4-3b7e-4844-fd46-3e0857653ca0" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cvIK7dWRjN_f", + "outputId": "79e873c8-9594-45da-f5a3-0a893511267f" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/elasticsearch.ipynb b/notebooks/elasticsearch.ipynb new file mode 100644 index 00000000..4bb69ad1 --- /dev/null +++ b/notebooks/elasticsearch.ipynb @@ -0,0 +1,180 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## Cookbook for using ElasticSearchDB with Embedchain" + ], + "metadata": { + "id": "b02n_zJ_hl3d" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step-1: Install embedchain package" + ], + "metadata": { + "id": "gyJ6ui2vhtMY" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install embedchain" + ], + "metadata": { + "id": "-NbXjAdlh0vJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-2: Set OpenAI environment variables and install the dependencies.\n", + "\n", + "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys). Now lets install the dependencies needed for Elasticsearch." + ], + "metadata": { + "id": "nGnpSYAAh2bQ" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install --upgrade 'embedchain[elasticsearch]'" + ], + "metadata": { + "id": "-MUFRfxV7Jk7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" + ], + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-3: Define your Vector Database config" + ], + "metadata": { + "id": "Ns6RhPfbiitr" + } + }, + { + "cell_type": "code", + "source": [ + "config = \"\"\"\n", + "vectordb:\n", + " provider: elasticsearch\n", + " config:\n", + " collection_name: 'es-index'\n", + " es_url: your-elasticsearch-url.com\n", + " allow_reset: true\n", + " api_key: xxx\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('elasticsearch.yaml', 'w') as file:\n", + " file.write(config)" + ], + "metadata": { + "id": "S9CkxVjriotB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-4 Create embedchain app based on the config" + ], + "metadata": { + "id": "PGt6uPLIi1CS" + } + }, + { + "cell_type": "code", + "source": [ + "app = App.from_config(yaml_path=\"elasticsearch.yaml\")" + ], + "metadata": { + "id": "Amzxk3m-i3tD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-5: Add data sources to your app" + ], + "metadata": { + "id": "XNXv4yZwi7ef" + } + }, + { + "cell_type": "code", + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ], + "metadata": { + "id": "Sn_0rx9QjIY9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ], + "metadata": { + "id": "_7W6fDeAjMAP" + } + }, + { + "cell_type": "code", + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ], + "metadata": { + "id": "cvIK7dWRjN_f" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/gpt4all.ipynb b/notebooks/gpt4all.ipynb new file mode 100644 index 00000000..951bb76a --- /dev/null +++ b/notebooks/gpt4all.ipynb @@ -0,0 +1,205 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using GPT4All with Embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-NbXjAdlh0vJ", + "outputId": "077fa470-b51f-4c29-8c22-9c5f0a9cef47" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set GPT4ALL related environment variables and install dependencies\n", + "\n", + "GPT4All is free for all and doesn't require any API Key to use it. Just import the dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dGOE4u3dC6at", + "outputId": "c1c0087b-3f14-49fa-fb86-a4a3391ba14c" + }, + "outputs": [], + "source": [ + "!pip install --upgrade embedchain[opensource]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "from embedchain import App" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: gpt4all\n", + " config:\n", + " model: 'orca-mini-3b.ggmlv3.q4_0.bin'\n", + " temperature: 0.5\n", + " max_tokens: 1000\n", + " top_p: 1\n", + " stream: false\n", + "\n", + "embedder:\n", + " provider: gpt4all\n", + " config:\n", + " model: 'all-MiniLM-L6-v2'\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('gpt4all.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Amzxk3m-i3tD", + "outputId": "775db99b-e217-47db-f87f-788495d86f26" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"gpt4all.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "id": "Sn_0rx9QjIY9", + "outputId": "c6514f17-3cb2-4fbc-c80d-79b3a311ff30" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 480 + }, + "id": "cvIK7dWRjN_f", + "outputId": "c74f356a-d2fb-426d-b36c-d84911397338" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/hugging_face_hub.ipynb b/notebooks/hugging_face_hub.ipynb new file mode 100644 index 00000000..868d4cdc --- /dev/null +++ b/notebooks/hugging_face_hub.ipynb @@ -0,0 +1,228 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using Hugging Face Hub with Embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "-NbXjAdlh0vJ", + "outputId": "35ddc904-8067-44cf-dcc9-3c8b4cd29989" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set Hugging Face Hub related environment variables and install dependencies\n", + "\n", + "You can find your `HUGGINGFACE_ACCESS_TOKEN` key on your [Hugging Face Hub dashboard](https://huggingface.co/settings/tokens) and install the dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VfDNZJCqNfqo", + "outputId": "34894d35-7142-42ee-8564-2e9f718afcbb" + }, + "outputs": [], + "source": [ + "!pip install embedchain[huggingface-hub]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SCNT8khqcR3G", + "outputId": "b789ee77-ef50-4330-8ac6-5da645dc36d6" + }, + "outputs": [], + "source": [ + "!pip install embedchain[opensource]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"HUGGINGFACE_ACCESS_TOKEN\"] = \"hf_xxx\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: huggingface\n", + " config:\n", + " model: 'google/flan-t5-xxl'\n", + " temperature: 0.5\n", + " max_tokens: 1000\n", + " top_p: 0.8\n", + " stream: false\n", + "\n", + "embedder:\n", + " provider: huggingface\n", + " config:\n", + " model: 'sentence-transformers/all-mpnet-base-v2'\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('huggingface.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Amzxk3m-i3tD" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"huggingface.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "Sn_0rx9QjIY9", + "outputId": "3c2a803a-3a93-4b0d-a6ae-17ae3c96c3c2" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cvIK7dWRjN_f", + "outputId": "47a89d1c-b322-495c-822a-6c2ecef894d2" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HvZVn6gU5xB_" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/jina.ipynb b/notebooks/jina.ipynb new file mode 100644 index 00000000..801d25b7 --- /dev/null +++ b/notebooks/jina.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using JinaChat with Embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "-NbXjAdlh0vJ", + "outputId": "69cb79a6-c758-4656-ccf7-9f3105c81d16" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set JinaChat related environment variables\n", + "\n", + "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and `JINACHAT_API_KEY` key on your [Chat Jina dashboard](https://chat.jina.ai/api)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n", + "os.environ[\"JINACHAT_API_KEY\"] = \"xxx\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: jina\n", + " config:\n", + " temperature: 0.5\n", + " max_tokens: 1000\n", + " top_p: 1\n", + " stream: false\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('jina.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 321 + }, + "id": "Amzxk3m-i3tD", + "outputId": "8d00da74-5f73-49bb-b868-dcf1c375ac85" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"jina.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "id": "Sn_0rx9QjIY9", + "outputId": "10eeacc7-9263-448e-876d-002af897ebe5" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cvIK7dWRjN_f", + "outputId": "7dc7212f-a0e9-43c8-f119-f595ba79b4b7" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/llama2.ipynb b/notebooks/llama2.ipynb new file mode 100644 index 00000000..8852d593 --- /dev/null +++ b/notebooks/llama2.ipynb @@ -0,0 +1,196 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using LLAMA2 with Embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-NbXjAdlh0vJ", + "outputId": "86a4a9b2-4ed6-431c-da6f-c3eacb390f42" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set LLAMA2 related environment variables and install dependencies\n", + "\n", + "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and `REPLICATE_API_TOKEN` key on your [Replicate dashboard](https://replicate.com/account/api-tokens). Now lets install the dependencies for LLAMA2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qoBUbocNtUUD" + }, + "outputs": [], + "source": [ + "!pip install embedchain[llama2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n", + "os.environ[\"REPLICATE_API_TOKEN\"] = \"xxx\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: llama2\n", + " config:\n", + " model: 'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5'\n", + " temperature: 0.5\n", + " max_tokens: 1000\n", + " top_p: 0.5\n", + " stream: false\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('llama2.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Amzxk3m-i3tD" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"llama2.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "id": "Sn_0rx9QjIY9", + "outputId": "ba158e9c-0f16-4c6b-a876-7543120985a2" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 599 + }, + "id": "cvIK7dWRjN_f", + "outputId": "e2d11a25-a2ed-4034-ec6a-e8a5986c89ae" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/openai.ipynb b/notebooks/openai.ipynb new file mode 100644 index 00000000..acfbfdcc --- /dev/null +++ b/notebooks/openai.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using OpenAI with Embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "-NbXjAdlh0vJ", + "outputId": "6c630676-c7fc-4054-dc94-c613de58a037" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set OpenAI environment variables\n", + "\n", + "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: openai\n", + " config:\n", + " model: gpt-35-turbo\n", + " temperature: 0.5\n", + " max_tokens: 1000\n", + " top_p: 1\n", + " stream: false\n", + "\n", + "embedder:\n", + " provider: openai\n", + " config:\n", + " model: text-embedding-ada-002\n", + " deployment_name: ec_embeddings_ada_002\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('openai.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Amzxk3m-i3tD" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"openai.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Sn_0rx9QjIY9" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cvIK7dWRjN_f" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/opensearch.ipynb b/notebooks/opensearch.ipynb new file mode 100644 index 00000000..16a6b1e6 --- /dev/null +++ b/notebooks/opensearch.ipynb @@ -0,0 +1,184 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## Cookbook for using OpenSearchDB with Embedchain" + ], + "metadata": { + "id": "b02n_zJ_hl3d" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step-1: Install embedchain package" + ], + "metadata": { + "id": "gyJ6ui2vhtMY" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install embedchain" + ], + "metadata": { + "id": "-NbXjAdlh0vJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-2: Set OpenAI environment variables and install the dependencies.\n", + "\n", + "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys). Now lets install the dependencies needed for Opensearch." + ], + "metadata": { + "id": "nGnpSYAAh2bQ" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install --upgrade 'embedchain[opensearch]'" + ], + "metadata": { + "id": "-MUFRfxV7Jk7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" + ], + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-3: Define your Vector Database config" + ], + "metadata": { + "id": "Ns6RhPfbiitr" + } + }, + { + "cell_type": "code", + "source": [ + "config = \"\"\"\n", + "vectordb:\n", + " provider: opensearch\n", + " config:\n", + " opensearch_url: 'your-opensearch-url.com'\n", + " http_auth:\n", + " - admin\n", + " - admin\n", + " vector_dimension: 1536\n", + " collection_name: 'my-app'\n", + " use_ssl: false\n", + " verify_certs: false\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('opensearch.yaml', 'w') as file:\n", + " file.write(config)" + ], + "metadata": { + "id": "S9CkxVjriotB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-4 Create embedchain app based on the config" + ], + "metadata": { + "id": "PGt6uPLIi1CS" + } + }, + { + "cell_type": "code", + "source": [ + "app = App.from_config(yaml_path=\"opensearch.yaml\")" + ], + "metadata": { + "id": "Amzxk3m-i3tD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-5: Add data sources to your app" + ], + "metadata": { + "id": "XNXv4yZwi7ef" + } + }, + { + "cell_type": "code", + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ], + "metadata": { + "id": "Sn_0rx9QjIY9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ], + "metadata": { + "id": "_7W6fDeAjMAP" + } + }, + { + "cell_type": "code", + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ], + "metadata": { + "id": "cvIK7dWRjN_f" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/pinecone.ipynb b/notebooks/pinecone.ipynb new file mode 100644 index 00000000..5f6b018d --- /dev/null +++ b/notebooks/pinecone.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using PineconeDB with Embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-NbXjAdlh0vJ" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set environment variables needed for Pinecone and install the dependencies.\n", + "\n", + "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and [Pinecone dashboard](https://app.pinecone.io/). Now lets install the dependencies needed for Pinecone." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-MUFRfxV7Jk7" + }, + "outputs": [], + "source": [ + "!pip install --upgrade 'embedchain[pinecone]'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n", + "os.environ[\"PINECONE_API_KEY\"] = \"xxx\"\n", + "os.environ[\"PINECONE_ENV\"] = \"xxx\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your Vector Database config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "vectordb:\n", + " provider: pinecone\n", + " config:\n", + " metric: cosine\n", + " vector_dimension: 768\n", + " collection_name: pc-index\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('pinecone.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Amzxk3m-i3tD" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"pinecone.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Sn_0rx9QjIY9" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cvIK7dWRjN_f" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/vertex_ai.ipynb b/notebooks/vertex_ai.ipynb new file mode 100644 index 00000000..574f65ea --- /dev/null +++ b/notebooks/vertex_ai.ipynb @@ -0,0 +1,194 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b02n_zJ_hl3d" + }, + "source": [ + "## Cookbook for using VertexAI with Embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyJ6ui2vhtMY" + }, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-NbXjAdlh0vJ", + "outputId": "eb9be5b6-dc81-43d2-d515-df8f0116be11" + }, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGnpSYAAh2bQ" + }, + "source": [ + "### Step-2: Set VertexAI related environment variables and install dependencies.\n", + "\n", + "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys). Now lets install the dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a_shbIFBtnwu" + }, + "outputs": [], + "source": [ + "!pip install embedchain[vertexai]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fBdQ9GAiRvK" + }, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ns6RhPfbiitr" + }, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S9CkxVjriotB" + }, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: vertexai\n", + " config:\n", + " model: 'chat-bison'\n", + " temperature: 0.5\n", + " max_tokens: 1000\n", + " stream: false\n", + "\n", + "embedder:\n", + " provider: vertexai\n", + " config:\n", + " model: 'textembedding-gecko'\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('vertexai.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGt6uPLIi1CS" + }, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 582 + }, + "id": "Amzxk3m-i3tD", + "outputId": "5084b6ea-ec20-4281-9f36-e21e93c17475" + }, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"vertexai.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XNXv4yZwi7ef" + }, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Sn_0rx9QjIY9" + }, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7W6fDeAjMAP" + }, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cvIK7dWRjN_f" + }, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']:\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}