Provide openai-key support from config (#1052)

2023-12-23 14:42:18 +05:30
parent e90673ae5b
commit 11f0d719f5
9 changed files with 31 additions and 43 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,33 +0,0 @@
-{
-  "name": "Python 3",
-  // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
-  "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
-  "customizations": {
-    "codespaces": {
-      "openFiles": [
-        "README.md",
-        "examples/chat-pdf/app.py"
-      ]
-    },
-    "vscode": {
-      "settings": {},
-      "extensions": [
-        "ms-python.python",
-        "ms-python.vscode-pylance"
-      ]
-    }
-  },
-  "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
-  "postAttachCommand": {
-    "server": "streamlit run examples/chat-pdf/app.py --server.enableCORS false --server.enableXsrfProtection false"
-  },
-  "portsAttributes": {
-    "8501": {
-      "label": "Application",
-      "onAutoForward": "openPreview"
-    }
-  },
-  "forwardPorts": [
-    8501
-  ]
-}
--- a/docs/api-reference/advanced/configuration.mdx
+++ b/docs/api-reference/advanced/configuration.mdx
@@ -25,6 +25,7 @@ llm:
    max_tokens: 1000
    top_p: 1
    stream: false
+    api_key: sk-xxx
    template: |
      Use the following pieces of context to answer the query at the end.
      If you don't know the answer, just say that you don't know, don't try to make up an answer.
@@ -48,6 +49,7 @@ embedder:
  provider: openai
  config:
    model: 'text-embedding-ada-002'
+    api_key: sk-xxx

 chunker:
  chunk_size: 2000
@@ -72,7 +74,8 @@ chunker:
      "top_p": 1,
      "stream": false,
      "template": "Use the following pieces of context to answer the query at the end.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n$context\n\nQuery: $query\n\nHelpful Answer:",
-      "system_prompt": "Act as William Shakespeare. Answer the following questions in the style of William Shakespeare."
+      "system_prompt": "Act as William Shakespeare. Answer the following questions in the style of William Shakespeare.",
+      "api_key": "sk-xxx"
    }
  },
  "vectordb": {
@@ -86,7 +89,8 @@ chunker:
  "embedder": {
    "provider": "openai",
    "config": {
-      "model": "text-embedding-ada-002"
+      "model": "text-embedding-ada-002",
+      "api_key": "sk-xxx"
    }
  },
  "chunker": {
@@ -120,7 +124,8 @@ config = {
            ),
            'system_prompt': (
                "Act as William Shakespeare. Answer the following questions in the style of William Shakespeare."
-            )
+            ),
+            'api_key': 'sk-xxx'
        }
    },
    'vectordb': {
@@ -134,7 +139,8 @@ config = {
    'embedder': {
        'provider': 'openai',
        'config': {
-            'model': 'text-embedding-ada-002'
+            'model': 'text-embedding-ada-002',
+            'api_key': 'sk-xxx'
        }
    },
    'chunker': {
@@ -168,6 +174,7 @@ Alright, let's dive into what each key means in the yaml config above:
        - `system_prompt` (String): A system prompt for the model to follow when generating responses, in this case, it's set to the style of William Shakespeare.
        -  `stream` (Boolean): Controls if the response is streamed back to the user (set to false).
        - `number_documents` (Integer): Number of documents to pull from the vectordb as context, defaults to 1
+        - `api_key` (String): The API key for the language model.
 3. `vectordb` Section:
    - `provider` (String): The provider for the vector database, set to 'chroma'. You can find the full list of vector database providers in [our docs](/components/vector-databases).
    - `config`:
@@ -179,6 +186,7 @@ Alright, let's dive into what each key means in the yaml config above:
    - `provider` (String): The provider for the embedder, set to 'openai'. You can find the full list of embedding model providers in [our docs](/components/embedding-models).
    - `config`:
        - `model` (String): The specific model used for text embedding, 'text-embedding-ada-002'.
+        - `api_key` (String): The API key for the embedding model.
 5. `chunker` Section:
    - `chunk_size` (Integer): The size of each chunk of text that is sent to the language model.
    - `chunk_overlap` (Integer): The amount of overlap between each chunk of text.
--- a/embedchain/config/embedder/base.py
+++ b/embedchain/config/embedder/base.py
@@ -5,7 +5,9 @@ from embedchain.helpers.json_serializable import register_deserializable

@register_deserializable
 class BaseEmbedderConfig:
-    def __init__(self, model: Optional[str] = None, deployment_name: Optional[str] = None):
+    def __init__(
+        self, model: Optional[str] = None, deployment_name: Optional[str] = None, api_key: Optional[str] = None
+    ):
        """
        Initialize a new instance of an embedder config class.

@@ -16,3 +18,4 @@ class BaseEmbedderConfig:
        """
        self.model = model
        self.deployment_name = deployment_name
+        self.api_key = api_key
--- a/embedchain/config/llm/base.py
+++ b/embedchain/config/llm/base.py
@@ -69,6 +69,7 @@ class BaseLlmConfig(BaseConfig):
        where: Dict[str, Any] = None,
        query_type: Optional[str] = None,
        callbacks: Optional[List] = None,
+        api_key: Optional[str] = None,
    ):
        """
        Initializes a configuration class instance for the LLM.
@@ -117,6 +118,7 @@ class BaseLlmConfig(BaseConfig):
        self.system_prompt = system_prompt
        self.query_type = query_type
        self.callbacks = callbacks
+        self.api_key = api_key

        if type(template) is str:
            template = Template(template)
--- a/embedchain/embedder/openai.py
+++ b/embedchain/embedder/openai.py
@@ -16,16 +16,18 @@ class OpenAIEmbedder(BaseEmbedder):
        if self.config.model is None:
            self.config.model = "text-embedding-ada-002"

+        api_key = self.config.api_key or os.environ["OPENAI_API_KEY"]
+
        if self.config.deployment_name:
            embeddings = AzureOpenAIEmbeddings(deployment=self.config.deployment_name)
            embedding_fn = BaseEmbedder._langchain_default_concept(embeddings)
        else:
-            if os.getenv("OPENAI_API_KEY") is None and os.getenv("OPENAI_ORGANIZATION") is None:
+            if api_key is None and os.getenv("OPENAI_ORGANIZATION") is None:
                raise ValueError(
                    "OPENAI_API_KEY or OPENAI_ORGANIZATION environment variables not provided"
                )  # noqa:E501
            embedding_fn = OpenAIEmbeddingFunction(
-                api_key=os.getenv("OPENAI_API_KEY"),
+                api_key=api_key,
                organization_id=os.getenv("OPENAI_ORGANIZATION"),
                model_name=self.config.model,
            )
--- a/embedchain/llm/openai.py
+++ b/embedchain/llm/openai.py
@@ -1,4 +1,5 @@
 import json
+import os
 from typing import Any, Dict, Optional

 from langchain.chat_models import ChatOpenAI
@@ -30,6 +31,7 @@ class OpenAILlm(BaseLlm):
            "max_tokens": config.max_tokens,
            "model_kwargs": {},
        }
+        api_key = config.api_key or os.environ["OPENAI_API_KEY"]
        if config.top_p:
            kwargs["model_kwargs"]["top_p"] = config.top_p
        if config.stream:
@@ -37,9 +39,9 @@ class OpenAILlm(BaseLlm):
                StreamingStdOutCallbackHandler

            callbacks = config.callbacks if config.callbacks else [StreamingStdOutCallbackHandler()]
-            chat = ChatOpenAI(**kwargs, streaming=config.stream, callbacks=callbacks)
+            chat = ChatOpenAI(**kwargs, streaming=config.stream, callbacks=callbacks, api_key=api_key)
        else:
-            chat = ChatOpenAI(**kwargs)
+            chat = ChatOpenAI(**kwargs, api_key=api_key)
        if self.functions is not None:
            from langchain.chains.openai_functions import \
                create_openai_fn_runnable
--- a/embedchain/utils.py
+++ b/embedchain/utils.py
@@ -403,6 +403,7 @@ def validate_config(config_data):
                    Optional("deployment_name"): str,
                    Optional("where"): dict,
                    Optional("query_type"): str,
+                    Optional("api_key"): str,
                },
            },
            Optional("vectordb"): {
@@ -416,6 +417,7 @@ def validate_config(config_data):
                Optional("config"): {
                    Optional("model"): Optional(str),
                    Optional("deployment_name"): Optional(str),
+                    Optional("api_key"): str,
                },
            },
            Optional("embedding_model"): {
@@ -423,6 +425,7 @@ def validate_config(config_data):
                Optional("config"): {
                    Optional("model"): str,
                    Optional("deployment_name"): str,
+                    Optional("api_key"): str,
                },
            },
            Optional("chunker"): {
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "embedchain"
-version = "0.1.41"
+version = "0.1.42"
 description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
 authors = [
    "Taranjeet Singh <taranjeet@embedchain.ai>",
--- a/tests/llm/test_openai.py
+++ b/tests/llm/test_openai.py
@@ -72,4 +72,5 @@ def test_get_llm_model_answer_without_system_prompt(config, mocker):
        temperature=config.temperature,
        max_tokens=config.max_tokens,
        model_kwargs={"top_p": config.top_p},
+        api_key=os.environ["OPENAI_API_KEY"],
    )