Provide openai-key support from config (#1052)

This commit is contained in:
Sidharth Mohanty
2023-12-23 14:42:18 +05:30
committed by GitHub
parent e90673ae5b
commit 11f0d719f5
9 changed files with 31 additions and 43 deletions

View File

@@ -1,33 +0,0 @@
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
"customizations": {
"codespaces": {
"openFiles": [
"README.md",
"examples/chat-pdf/app.py"
]
},
"vscode": {
"settings": {},
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
"postAttachCommand": {
"server": "streamlit run examples/chat-pdf/app.py --server.enableCORS false --server.enableXsrfProtection false"
},
"portsAttributes": {
"8501": {
"label": "Application",
"onAutoForward": "openPreview"
}
},
"forwardPorts": [
8501
]
}

View File

@@ -25,6 +25,7 @@ llm:
max_tokens: 1000
top_p: 1
stream: false
api_key: sk-xxx
template: |
Use the following pieces of context to answer the query at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
@@ -48,6 +49,7 @@ embedder:
provider: openai
config:
model: 'text-embedding-ada-002'
api_key: sk-xxx
chunker:
chunk_size: 2000
@@ -72,7 +74,8 @@ chunker:
"top_p": 1,
"stream": false,
"template": "Use the following pieces of context to answer the query at the end.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n$context\n\nQuery: $query\n\nHelpful Answer:",
"system_prompt": "Act as William Shakespeare. Answer the following questions in the style of William Shakespeare."
"system_prompt": "Act as William Shakespeare. Answer the following questions in the style of William Shakespeare.",
"api_key": "sk-xxx"
}
},
"vectordb": {
@@ -86,7 +89,8 @@ chunker:
"embedder": {
"provider": "openai",
"config": {
"model": "text-embedding-ada-002"
"model": "text-embedding-ada-002",
"api_key": "sk-xxx"
}
},
"chunker": {
@@ -120,7 +124,8 @@ config = {
),
'system_prompt': (
"Act as William Shakespeare. Answer the following questions in the style of William Shakespeare."
)
),
'api_key': 'sk-xxx'
}
},
'vectordb': {
@@ -134,7 +139,8 @@ config = {
'embedder': {
'provider': 'openai',
'config': {
'model': 'text-embedding-ada-002'
'model': 'text-embedding-ada-002',
'api_key': 'sk-xxx'
}
},
'chunker': {
@@ -168,6 +174,7 @@ Alright, let's dive into what each key means in the yaml config above:
- `system_prompt` (String): A system prompt for the model to follow when generating responses, in this case, it's set to the style of William Shakespeare.
- `stream` (Boolean): Controls if the response is streamed back to the user (set to false).
- `number_documents` (Integer): Number of documents to pull from the vectordb as context, defaults to 1
- `api_key` (String): The API key for the language model.
3. `vectordb` Section:
- `provider` (String): The provider for the vector database, set to 'chroma'. You can find the full list of vector database providers in [our docs](/components/vector-databases).
- `config`:
@@ -179,6 +186,7 @@ Alright, let's dive into what each key means in the yaml config above:
- `provider` (String): The provider for the embedder, set to 'openai'. You can find the full list of embedding model providers in [our docs](/components/embedding-models).
- `config`:
- `model` (String): The specific model used for text embedding, 'text-embedding-ada-002'.
- `api_key` (String): The API key for the embedding model.
5. `chunker` Section:
- `chunk_size` (Integer): The size of each chunk of text that is sent to the language model.
- `chunk_overlap` (Integer): The amount of overlap between each chunk of text.

View File

@@ -5,7 +5,9 @@ from embedchain.helpers.json_serializable import register_deserializable
@register_deserializable
class BaseEmbedderConfig:
def __init__(self, model: Optional[str] = None, deployment_name: Optional[str] = None):
def __init__(
self, model: Optional[str] = None, deployment_name: Optional[str] = None, api_key: Optional[str] = None
):
"""
Initialize a new instance of an embedder config class.
@@ -16,3 +18,4 @@ class BaseEmbedderConfig:
"""
self.model = model
self.deployment_name = deployment_name
self.api_key = api_key

View File

@@ -69,6 +69,7 @@ class BaseLlmConfig(BaseConfig):
where: Dict[str, Any] = None,
query_type: Optional[str] = None,
callbacks: Optional[List] = None,
api_key: Optional[str] = None,
):
"""
Initializes a configuration class instance for the LLM.
@@ -117,6 +118,7 @@ class BaseLlmConfig(BaseConfig):
self.system_prompt = system_prompt
self.query_type = query_type
self.callbacks = callbacks
self.api_key = api_key
if type(template) is str:
template = Template(template)

View File

@@ -16,16 +16,18 @@ class OpenAIEmbedder(BaseEmbedder):
if self.config.model is None:
self.config.model = "text-embedding-ada-002"
api_key = self.config.api_key or os.environ["OPENAI_API_KEY"]
if self.config.deployment_name:
embeddings = AzureOpenAIEmbeddings(deployment=self.config.deployment_name)
embedding_fn = BaseEmbedder._langchain_default_concept(embeddings)
else:
if os.getenv("OPENAI_API_KEY") is None and os.getenv("OPENAI_ORGANIZATION") is None:
if api_key is None and os.getenv("OPENAI_ORGANIZATION") is None:
raise ValueError(
"OPENAI_API_KEY or OPENAI_ORGANIZATION environment variables not provided"
) # noqa:E501
embedding_fn = OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"),
api_key=api_key,
organization_id=os.getenv("OPENAI_ORGANIZATION"),
model_name=self.config.model,
)

View File

@@ -1,4 +1,5 @@
import json
import os
from typing import Any, Dict, Optional
from langchain.chat_models import ChatOpenAI
@@ -30,6 +31,7 @@ class OpenAILlm(BaseLlm):
"max_tokens": config.max_tokens,
"model_kwargs": {},
}
api_key = config.api_key or os.environ["OPENAI_API_KEY"]
if config.top_p:
kwargs["model_kwargs"]["top_p"] = config.top_p
if config.stream:
@@ -37,9 +39,9 @@ class OpenAILlm(BaseLlm):
StreamingStdOutCallbackHandler
callbacks = config.callbacks if config.callbacks else [StreamingStdOutCallbackHandler()]
chat = ChatOpenAI(**kwargs, streaming=config.stream, callbacks=callbacks)
chat = ChatOpenAI(**kwargs, streaming=config.stream, callbacks=callbacks, api_key=api_key)
else:
chat = ChatOpenAI(**kwargs)
chat = ChatOpenAI(**kwargs, api_key=api_key)
if self.functions is not None:
from langchain.chains.openai_functions import \
create_openai_fn_runnable

View File

@@ -403,6 +403,7 @@ def validate_config(config_data):
Optional("deployment_name"): str,
Optional("where"): dict,
Optional("query_type"): str,
Optional("api_key"): str,
},
},
Optional("vectordb"): {
@@ -416,6 +417,7 @@ def validate_config(config_data):
Optional("config"): {
Optional("model"): Optional(str),
Optional("deployment_name"): Optional(str),
Optional("api_key"): str,
},
},
Optional("embedding_model"): {
@@ -423,6 +425,7 @@ def validate_config(config_data):
Optional("config"): {
Optional("model"): str,
Optional("deployment_name"): str,
Optional("api_key"): str,
},
},
Optional("chunker"): {

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "embedchain"
version = "0.1.41"
version = "0.1.42"
description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
authors = [
"Taranjeet Singh <taranjeet@embedchain.ai>",

View File

@@ -72,4 +72,5 @@ def test_get_llm_model_answer_without_system_prompt(config, mocker):
temperature=config.temperature,
max_tokens=config.max_tokens,
model_kwargs={"top_p": config.top_p},
api_key=os.environ["OPENAI_API_KEY"],
)