diff --git a/.gitignore b/.gitignore index 47eec270..04de5b05 100644 --- a/.gitignore +++ b/.gitignore @@ -76,7 +76,7 @@ docs/_build/ target/ # Jupyter Notebook -.ipynb_checkpoints +*.yaml # IPython profile_default/ @@ -171,3 +171,6 @@ db .idea/ .DS_Store + +notebooks/*.yaml +.ipynb_checkpoints/ \ No newline at end of file diff --git a/configs/azure_openai.yaml b/configs/azure_openai.yaml new file mode 100644 index 00000000..ef77925e --- /dev/null +++ b/configs/azure_openai.yaml @@ -0,0 +1,19 @@ +app: + config: + id: azure-openai-app + +llm: + provider: azure_openai + model: gpt-35-turbo + config: + deployment_name: your_llm_deployment_name + temperature: 0.5 + max_tokens: 1000 + top_p: 1 + stream: false + +embedder: + provider: azure_openai + config: + model: text-embedding-ada-002 + deployment_name: you_embedding_model_deployment_name diff --git a/docs/components/embedding-models.mdx b/docs/components/embedding-models.mdx index 592bae7a..9a7d7c1b 100644 --- a/docs/components/embedding-models.mdx +++ b/docs/components/embedding-models.mdx @@ -8,6 +8,7 @@ Embedchain supports several embedding models from the following providers: + @@ -43,6 +44,45 @@ embedder: +## Azure OpenAI + +To use Azure OpenAI embedding model, you have to set some of the azure openai related environment variables as given in the code block below: + + + +```python main.py +import os +from embedchain import App + +os.environ["OPENAI_API_TYPE"] = "azure" +os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/" +os.environ["OPENAI_API_KEY"] = "xxx" +os.environ["OPENAI_API_VERSION"] = "xxx" + +app = App.from_config(yaml_path="config.yaml") +``` + +```yaml config.yaml +llm: + provider: azure_openai + model: gpt-35-turbo + config: + deployment_name: your_llm_deployment_name + temperature: 0.5 + max_tokens: 1000 + top_p: 1 + stream: false + +embedder: + provider: azure_openai + config: + model: text-embedding-ada-002 + deployment_name: you_embedding_model_deployment_name +``` + + +You can find the list of models and deployment name on the [Azure OpenAI Platform](https://oai.azure.com/portal). + ## GPT4ALL GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained Sentence Transformer. diff --git a/docs/components/llms.mdx b/docs/components/llms.mdx index 8c644d6c..f42b11c9 100644 --- a/docs/components/llms.mdx +++ b/docs/components/llms.mdx @@ -65,7 +65,42 @@ llm: ## Azure OpenAI -_Coming soon_ +To use Azure OpenAI model, you have to set some of the azure openai related environment variables as given in the code block below: + + + +```python main.py +import os +from embedchain import App + +os.environ["OPENAI_API_TYPE"] = "azure" +os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/" +os.environ["OPENAI_API_KEY"] = "xxx" +os.environ["OPENAI_API_VERSION"] = "xxx" + +app = App.from_config(yaml_path="config.yaml") +``` + +```yaml config.yaml +llm: + provider: azure_openai + model: gpt-35-turbo + config: + deployment_name: your_llm_deployment_name + temperature: 0.5 + max_tokens: 1000 + top_p: 1 + stream: false + +embedder: + provider: azure_openai + config: + model: text-embedding-ada-002 + deployment_name: you_embedding_model_deployment_name +``` + + +You can find the list of models and deployment name on the [Azure OpenAI Platform](https://oai.azure.com/portal). ## Anthropic diff --git a/docs/components/vector-databases.mdx b/docs/components/vector-databases.mdx index 09c53a53..2b30140d 100644 --- a/docs/components/vector-databases.mdx +++ b/docs/components/vector-databases.mdx @@ -119,11 +119,17 @@ Install related dependencies using the following command: pip install --upgrade 'embedchain[milvus]' ``` +Set the Zilliz environment variables `ZILLIZ_CLOUD_URI` and `ZILLIZ_CLOUD_TOKEN` which you can find it on their [cloud platform](https://cloud.zilliz.com/). + ```python main.py +import os from embedchain import App +os.environ['ZILLIZ_CLOUD_URI'] = 'https://xxx.zillizcloud.com' +os.environ['ZILLIZ_CLOUD_TOKEN'] = 'xxx' + # load zilliz configuration from yaml file app = App.from_config(yaml_path="config.yaml") ``` @@ -147,8 +153,16 @@ _Coming soon_ ## Pinecone +Install pinecone related dependencies using the following command: + +```bash +pip install --upgrade 'embedchain[pinecone]' +``` + In order to use Pinecone as vector database, set the environment variables `PINECONE_API_KEY` and `PINECONE_ENV` which you can find on [Pinecone dashboard](https://app.pinecone.io/). + + ```python main.py from embedchain import App @@ -165,6 +179,8 @@ vectordb: collection_name: my-pinecone-index ``` + + ## Qdrant _Coming soon_ diff --git a/embedchain/factory.py b/embedchain/factory.py index b3bbf0cc..e1ebcf37 100644 --- a/embedchain/factory.py +++ b/embedchain/factory.py @@ -44,10 +44,12 @@ class EmbedderFactory: "gpt4all": "embedchain.embedder.gpt4all.GPT4AllEmbedder", "huggingface": "embedchain.embedder.huggingface.HuggingFaceEmbedder", "vertexai": "embedchain.embedder.vertexai.VertexAIEmbedder", + "azure_openai": "embedchain.embedder.openai.OpenAIEmbedder", "openai": "embedchain.embedder.openai.OpenAIEmbedder", } provider_to_config_class = { "openai": "embedchain.config.embedder.base.BaseEmbedderConfig", + "azure_openai": "embedchain.config.embedder.base.BaseEmbedderConfig", } @classmethod diff --git a/notebooks/azure-openai.ipynb b/notebooks/azure-openai.ipynb new file mode 100644 index 00000000..85f6a6ed --- /dev/null +++ b/notebooks/azure-openai.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "63ab5e89", + "metadata": {}, + "source": [ + "## Cookbook for using Azure OpenAI with Embedchain" + ] + }, + { + "cell_type": "markdown", + "id": "e32a0265", + "metadata": {}, + "source": [ + "### Step-1: Install embedchain package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b80ff15a", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install embedchain" + ] + }, + { + "cell_type": "markdown", + "id": "ac982a56", + "metadata": {}, + "source": [ + "### Step-2: Set Azure OpenAI related environment variables\n", + "\n", + "You can find these env variables on your Azure OpenAI dashboard." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0a36133", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from embedchain import App\n", + "\n", + "os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n", + "os.environ[\"OPENAI_API_BASE\"] = \"https://xxx.openai.azure.com/\"\n", + "os.environ[\"OPENAI_API_KEY\"] = \"xxx\"\n", + "os.environ[\"OPENAI_API_VERSION\"] = \"xxx\"" + ] + }, + { + "cell_type": "markdown", + "id": "7d7b554e", + "metadata": {}, + "source": [ + "### Step-3: Define your llm and embedding model config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9f52fc5", + "metadata": {}, + "outputs": [], + "source": [ + "config = \"\"\"\n", + "llm:\n", + " provider: azure_openai\n", + " model: gpt-35-turbo\n", + " config:\n", + " deployment_name: ec_openai_azure\n", + " temperature: 0.5\n", + " max_tokens: 1000\n", + " top_p: 1\n", + " stream: false\n", + "\n", + "embedder:\n", + " provider: azure_openai\n", + " config:\n", + " model: text-embedding-ada-002\n", + " deployment_name: ec_embeddings_ada_002\n", + "\"\"\"\n", + "\n", + "# Write the multi-line string to a YAML file\n", + "with open('azure_openai.yaml', 'w') as file:\n", + " file.write(config)" + ] + }, + { + "cell_type": "markdown", + "id": "98a11130", + "metadata": {}, + "source": [ + "### Step-4 Create embedchain app based on the config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ee9bdd9", + "metadata": {}, + "outputs": [], + "source": [ + "app = App.from_config(yaml_path=\"azure_openai.yaml\")" + ] + }, + { + "cell_type": "markdown", + "id": "554dc97b", + "metadata": {}, + "source": [ + "### Step-5: Add data sources to your app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "686ae765", + "metadata": {}, + "outputs": [], + "source": [ + "app.add(\"https://www.forbes.com/profile/elon-musk\")" + ] + }, + { + "cell_type": "markdown", + "id": "ccc7d421", + "metadata": {}, + "source": [ + "### Step-6: All set. Now start asking questions related to your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27868a7d", + "metadata": {}, + "outputs": [], + "source": [ + "while(True):\n", + " question = input(\"Enter question: \")\n", + " if question in ['q', 'exit', 'quit']\n", + " break\n", + " answer = app.query(question)\n", + " print(answer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1f2ead5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}