[Docs] Add docs for Azure OpenAI provider (#804)

2023-10-16 13:31:56 -07:00
parent 636bc0a99d
commit adf50f1e81
7 changed files with 299 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -76,7 +76,7 @@ docs/_build/
 target/

 # Jupyter Notebook
-.ipynb_checkpoints
+*.yaml

 # IPython
 profile_default/
@@ -171,3 +171,6 @@ db
 .idea/

 .DS_Store
+
+notebooks/*.yaml
+.ipynb_checkpoints/
--- a/configs/azure_openai.yaml
+++ b/configs/azure_openai.yaml
@@ -0,0 +1,19 @@
+app:
+  config:
+    id: azure-openai-app
+
+llm:
+  provider: azure_openai
+  model: gpt-35-turbo
+  config:
+    deployment_name: your_llm_deployment_name
+    temperature: 0.5
+    max_tokens: 1000
+    top_p: 1
+    stream: false
+
+embedder:
+  provider: azure_openai
+  config:
+    model: text-embedding-ada-002
+    deployment_name: you_embedding_model_deployment_name
--- a/docs/components/embedding-models.mdx
+++ b/docs/components/embedding-models.mdx
@@ -8,6 +8,7 @@ Embedchain supports several embedding models from the following providers:

 <CardGroup cols={4}>
  <Card title="OpenAI" href="#openai"></Card>
+  <Card title="Azure OpenAI" href="#azure-openai"></Card>
  <Card title="GPT4All" href="#gpt4all"></Card>
  <Card title="Hugging Face" href="#hugging-face"></Card>
  <Card title="Vertex AI" href="#vertex-ai"></Card>
@@ -43,6 +44,45 @@ embedder:

 </CodeGroup>

+## Azure OpenAI
+
+To use Azure OpenAI embedding model, you have to set some of the azure openai related environment variables as given in the code block below:
+
+<CodeGroup>
+
+```python main.py
+import os
+from embedchain import App
+
+os.environ["OPENAI_API_TYPE"] = "azure"
+os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
+os.environ["OPENAI_API_KEY"] = "xxx"
+os.environ["OPENAI_API_VERSION"] = "xxx"
+
+app = App.from_config(yaml_path="config.yaml")
+```
+
+```yaml config.yaml
+llm:
+  provider: azure_openai
+  model: gpt-35-turbo
+  config:
+    deployment_name: your_llm_deployment_name
+    temperature: 0.5
+    max_tokens: 1000
+    top_p: 1
+    stream: false
+
+embedder:
+  provider: azure_openai
+  config:
+    model: text-embedding-ada-002
+    deployment_name: you_embedding_model_deployment_name
+```
+</CodeGroup>
+
+You can find the list of models and deployment name on the [Azure OpenAI Platform](https://oai.azure.com/portal).
+
 ## GPT4ALL

 GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained Sentence Transformer.
--- a/docs/components/llms.mdx
+++ b/docs/components/llms.mdx
@@ -65,7 +65,42 @@ llm:

 ## Azure OpenAI

-_Coming soon_
+To use Azure OpenAI model, you have to set some of the azure openai related environment variables as given in the code block below:
+
+<CodeGroup>
+
+```python main.py
+import os
+from embedchain import App
+
+os.environ["OPENAI_API_TYPE"] = "azure"
+os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
+os.environ["OPENAI_API_KEY"] = "xxx"
+os.environ["OPENAI_API_VERSION"] = "xxx"
+
+app = App.from_config(yaml_path="config.yaml")
+```
+
+```yaml config.yaml
+llm:
+  provider: azure_openai
+  model: gpt-35-turbo
+  config:
+    deployment_name: your_llm_deployment_name
+    temperature: 0.5
+    max_tokens: 1000
+    top_p: 1
+    stream: false
+
+embedder:
+  provider: azure_openai
+  config:
+    model: text-embedding-ada-002
+    deployment_name: you_embedding_model_deployment_name
+```
+</CodeGroup>
+
+You can find the list of models and deployment name on the [Azure OpenAI Platform](https://oai.azure.com/portal).

 ## Anthropic

--- a/docs/components/vector-databases.mdx
+++ b/docs/components/vector-databases.mdx
@@ -119,11 +119,17 @@ Install related dependencies using the following command:
 pip install --upgrade 'embedchain[milvus]'
 ```

+Set the Zilliz environment variables `ZILLIZ_CLOUD_URI` and `ZILLIZ_CLOUD_TOKEN` which you can find it on their [cloud platform](https://cloud.zilliz.com/).
+
 <CodeGroup>

 ```python main.py
+import os
 from embedchain import App

+os.environ['ZILLIZ_CLOUD_URI'] = 'https://xxx.zillizcloud.com'
+os.environ['ZILLIZ_CLOUD_TOKEN'] = 'xxx'
+
 # load zilliz configuration from yaml file
 app = App.from_config(yaml_path="config.yaml")
 ```
@@ -147,8 +153,16 @@ _Coming soon_

 ## Pinecone

+Install pinecone related dependencies using the following command:
+
+```bash
+pip install --upgrade 'embedchain[pinecone]'
+```
+
 In order to use Pinecone as vector database, set the environment variables `PINECONE_API_KEY` and `PINECONE_ENV` which you can find on [Pinecone dashboard](https://app.pinecone.io/).

+<CodeGroup>
+
 ```python main.py
 from embedchain import App

@@ -165,6 +179,8 @@ vectordb:
    collection_name: my-pinecone-index
 ```

+</CodeGroup>
+
 ## Qdrant

 _Coming soon_
--- a/embedchain/factory.py
+++ b/embedchain/factory.py
@@ -44,10 +44,12 @@ class EmbedderFactory:
        "gpt4all": "embedchain.embedder.gpt4all.GPT4AllEmbedder",
        "huggingface": "embedchain.embedder.huggingface.HuggingFaceEmbedder",
        "vertexai": "embedchain.embedder.vertexai.VertexAIEmbedder",
+        "azure_openai": "embedchain.embedder.openai.OpenAIEmbedder",
        "openai": "embedchain.embedder.openai.OpenAIEmbedder",
    }
    provider_to_config_class = {
        "openai": "embedchain.config.embedder.base.BaseEmbedderConfig",
+        "azure_openai": "embedchain.config.embedder.base.BaseEmbedderConfig",
    }

    @classmethod
--- a/notebooks/azure-openai.ipynb
+++ b/notebooks/azure-openai.ipynb
@@ -0,0 +1,182 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "63ab5e89",
+   "metadata": {},
+   "source": [
+    "## Cookbook for using Azure OpenAI with Embedchain"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e32a0265",
+   "metadata": {},
+   "source": [
+    "### Step-1: Install embedchain package"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b80ff15a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install embedchain"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac982a56",
+   "metadata": {},
+   "source": [
+    "### Step-2: Set Azure OpenAI related environment variables\n",
+    "\n",
+    "You can find these env variables on your Azure OpenAI dashboard."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0a36133",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from embedchain import App\n",
+    "\n",
+    "os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
+    "os.environ[\"OPENAI_API_BASE\"] = \"https://xxx.openai.azure.com/\"\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"xxx\"\n",
+    "os.environ[\"OPENAI_API_VERSION\"] = \"xxx\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7d7b554e",
+   "metadata": {},
+   "source": [
+    "### Step-3: Define your llm and embedding model config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b9f52fc5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = \"\"\"\n",
+    "llm:\n",
+    "  provider: azure_openai\n",
+    "  model: gpt-35-turbo\n",
+    "  config:\n",
+    "    deployment_name: ec_openai_azure\n",
+    "    temperature: 0.5\n",
+    "    max_tokens: 1000\n",
+    "    top_p: 1\n",
+    "    stream: false\n",
+    "\n",
+    "embedder:\n",
+    "  provider: azure_openai\n",
+    "  config:\n",
+    "    model: text-embedding-ada-002\n",
+    "    deployment_name: ec_embeddings_ada_002\n",
+    "\"\"\"\n",
+    "\n",
+    "# Write the multi-line string to a YAML file\n",
+    "with open('azure_openai.yaml', 'w') as file:\n",
+    "    file.write(config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98a11130",
+   "metadata": {},
+   "source": [
+    "### Step-4 Create embedchain app based on the config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1ee9bdd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "app = App.from_config(yaml_path=\"azure_openai.yaml\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "554dc97b",
+   "metadata": {},
+   "source": [
+    "### Step-5: Add data sources to your app"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "686ae765",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ccc7d421",
+   "metadata": {},
+   "source": [
+    "### Step-6: All set. Now start asking questions related to your data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27868a7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "while(True):\n",
+    "    question = input(\"Enter question: \")\n",
+    "    if question in ['q', 'exit', 'quit']\n",
+    "        break\n",
+    "    answer = app.query(question)\n",
+    "    print(answer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e1f2ead5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}