feat: openai default model uses gpt-4o-mini (#1526)

This commit is contained in:
Kirk Lin
2024-09-09 15:28:28 +08:00
committed by GitHub
parent bf0cf2d9c4
commit 7170edd13f
18 changed files with 88 additions and 67 deletions

View File

@@ -5,7 +5,7 @@ app:
llm:
provider: openai
config:
model: 'gpt-3.5-turbo'
model: 'gpt-4o-mini'
temperature: 0.5
max_tokens: 1000
top_p: 1

View File

@@ -10,7 +10,7 @@ chunker:
llm:
provider: openai
config:
model: 'gpt-3.5-turbo'
model: 'gpt-4o-mini'
temperature: 0.5
max_tokens: 1000
top_p: 1

View File

@@ -8,7 +8,7 @@ app:
llm:
provider: openai
config:
model: 'gpt-3.5-turbo'
model: 'gpt-4o-mini'
temperature: 0.5
max_tokens: 1000
top_p: 1

View File

@@ -20,7 +20,7 @@ app:
llm:
provider: openai
config:
model: 'gpt-3.5-turbo'
model: 'gpt-4o-mini'
temperature: 0.5
max_tokens: 1000
top_p: 1
@@ -82,7 +82,7 @@ cache:
"llm": {
"provider": "openai",
"config": {
"model": "gpt-3.5-turbo",
"model": "gpt-4o-mini",
"temperature": 0.5,
"max_tokens": 1000,
"top_p": 1,
@@ -140,7 +140,7 @@ config = {
'llm': {
'provider': 'openai',
'config': {
'model': 'gpt-3.5-turbo',
'model': 'gpt-4o-mini',
'temperature': 0.5,
'max_tokens': 1000,
'top_p': 1,
@@ -206,7 +206,7 @@ Alright, let's dive into what each key means in the yaml config above:
2. `llm` Section:
- `provider` (String): The provider for the language model, which is set to 'openai'. You can find the full list of llm providers in [our docs](/components/llms).
- `config`:
- `model` (String): The specific model being used, 'gpt-3.5-turbo'.
- `model` (String): The specific model being used, 'gpt-4o-mini'.
- `temperature` (Float): Controls the randomness of the model's output. A higher value (closer to 1) makes the output more random.
- `max_tokens` (Integer): Controls how many tokens are used in the response.
- `top_p` (Float): Controls the diversity of word selection. A higher value (closer to 1) makes word selection more diverse.

View File

@@ -62,7 +62,7 @@ app = App.from_config(config_path="config.yaml")
llm:
provider: openai
config:
model: 'gpt-3.5-turbo'
model: 'gpt-4o-mini'
temperature: 0.5
max_tokens: 1000
top_p: 1
@@ -205,7 +205,7 @@ app = App.from_config(config_path="config.yaml")
llm:
provider: azure_openai
config:
model: gpt-3.5-turbo
model: gpt-4o-mini
deployment_name: your_llm_deployment_name
temperature: 0.5
max_tokens: 1000
@@ -887,7 +887,7 @@ response = app.chat("Which companies did Elon Musk found?")
llm:
provider: openai
config:
model: gpt-3.5-turbo
model: gpt-4o-mini
temperature: 0.5
max_tokens: 1000
token_usage: true

View File

@@ -32,7 +32,7 @@ app:
llm:
provider: openai
config:
model: "gpt-3.5-turbo"
model: "gpt-4o-mini"
temperature: 0.5
max_tokens: 1000
top_p: 1

View File

@@ -122,7 +122,7 @@ You can achieve this by setting `stream` to `true` in the config file.
llm:
provider: openai
config:
model: 'gpt-3.5-turbo'
model: 'gpt-4o-mini'
temperature: 0.5
max_tokens: 1000
top_p: 1

View File

@@ -1,6 +1,6 @@
{
"openai/gpt-4": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 8192,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00003,
@@ -13,6 +13,20 @@
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015
},
"gpt-4o-mini": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060
},
"gpt-4o-mini-2024-07-18": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060
},
"openai/gpt-4o-2024-05-13": {
"max_tokens": 4096,
"max_input_tokens": 128000,
@@ -153,7 +167,7 @@
"openai/text-embedding-ada-002": {
"max_tokens": 8191,
"max_input_tokens": 8191,
"output_vector_size": 1536,
"output_vector_size": 1536,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.000000
},
@@ -176,7 +190,7 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002
},
},
"openai/gpt-3.5-turbo-instruct": {
"max_tokens": 4096,
"max_input_tokens": 8192,
@@ -197,6 +211,13 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015
},
"azure/gpt-4o-mini": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060
},
"azure/gpt-4-turbo-2024-04-09": {
"max_tokens": 4096,
@@ -325,7 +346,7 @@
"max_input_tokens": 8191,
"input_cost_per_token": 0.00000002,
"output_cost_per_token": 0.000000
},
},
"mistralai/mistral-tiny": {
"max_tokens": 8191,
"max_input_tokens": 32000,
@@ -595,77 +616,77 @@
"max_tokens": 8192,
"max_input_tokens": 32760,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro": {
"vertexai/gemini-1.0-pro": {
"max_tokens": 8192,
"max_input_tokens": 32760,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro-001": {
"vertexai/gemini-1.0-pro-001": {
"max_tokens": 8192,
"max_input_tokens": 32760,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro-002": {
"vertexai/gemini-1.0-pro-002": {
"max_tokens": 8192,
"max_input_tokens": 32760,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.5-pro": {
"vertexai/gemini-1.5-pro": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-1.5-flash-001": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0,
"input_cost_per_token": 0,
"output_cost_per_token": 0
},
"vertexai/gemini-1.5-flash-preview-0514": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0,
"input_cost_per_token": 0,
"output_cost_per_token": 0
},
"vertexai/gemini-1.5-pro-001": {
"vertexai/gemini-1.5-pro-001": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-1.5-pro-preview-0514": {
"vertexai/gemini-1.5-pro-preview-0514": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-1.5-pro-preview-0215": {
"vertexai/gemini-1.5-pro-preview-0215": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-1.5-pro-preview-0409": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-experimental": {
@@ -682,7 +703,7 @@
"max_images_per_prompt": 16,
"max_videos_per_prompt": 1,
"max_video_length": 2,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro-vision": {
@@ -692,7 +713,7 @@
"max_images_per_prompt": 16,
"max_videos_per_prompt": 1,
"max_video_length": 2,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro-vision-001": {
@@ -702,7 +723,7 @@
"max_images_per_prompt": 16,
"max_videos_per_prompt": 1,
"max_video_length": 2,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/claude-3-sonnet@20240229": {
@@ -713,7 +734,7 @@
"output_cost_per_token": 0.000015
},
"vertexai/claude-3-haiku@20240307": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000025,
@@ -727,49 +748,49 @@
"output_cost_per_token": 0.000075
},
"cohere/command-r": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000050,
"output_cost_per_token": 0.0000015
},
"cohere/command-light": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"cohere/command-r-plus": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015
},
"cohere/command-nightly": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"cohere/command": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"cohere/command-medium-beta": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"cohere/command-xlarge-beta": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,

View File

@@ -26,7 +26,7 @@ class AzureOpenAILlm(BaseLlm):
chat = AzureChatOpenAI(
deployment_name=config.deployment_name,
openai_api_version=str(config.api_version) if config.api_version else "2024-02-01",
model_name=config.model or "gpt-3.5-turbo",
model_name=config.model or "gpt-4o-mini",
temperature=config.temperature,
max_tokens=config.max_tokens,
streaming=config.stream,

View File

@@ -52,7 +52,7 @@ class OpenAILlm(BaseLlm):
messages.append(SystemMessage(content=config.system_prompt))
messages.append(HumanMessage(content=prompt))
kwargs = {
"model": config.model or "gpt-3.5-turbo",
"model": config.model or "gpt-4o-mini",
"temperature": config.temperature,
"max_tokens": config.max_tokens,
"model_kwargs": config.model_kwargs or {},

View File

@@ -17,7 +17,7 @@ def embedchain_bot(db_path, api_key):
"llm": {
"provider": "openai",
"config": {
"model": "gpt-3.5-turbo-1106",
"model": "gpt-4o-mini",
"temperature": 0.5,
"max_tokens": 1000,
"top_p": 1,

View File

@@ -5,7 +5,7 @@ app:
llm:
provider: openai
config:
model: 'gpt-3.5-turbo'
model: 'gpt-4o-mini'
temperature: 0.5
max_tokens: 1000
top_p: 1

View File

@@ -80,7 +80,7 @@
" \"llm\": {\n",
" \"provider\": \"openai\",\n",
" \"config\": {\n",
" \"model\": \"gpt-3.5-turbo\",\n",
" \"model\": \"gpt-4o-mini\",\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000,\n",
" \"top_p\": 1,\n",

View File

@@ -13,7 +13,7 @@ def azure_openai_llm():
config = BaseLlmConfig(
deployment_name="azure_deployment",
temperature=0.7,
model="gpt-3.5-turbo",
model="gpt-4o-mini",
max_tokens=50,
system_prompt="System Prompt",
)
@@ -40,7 +40,7 @@ def test_get_answer(azure_openai_llm):
mock_chat.assert_called_once_with(
deployment_name=azure_openai_llm.config.deployment_name,
openai_api_version="2024-02-01",
model_name=azure_openai_llm.config.model or "gpt-3.5-turbo",
model_name=azure_openai_llm.config.model or "gpt-4o-mini",
temperature=azure_openai_llm.config.temperature,
max_tokens=azure_openai_llm.config.max_tokens,
streaming=azure_openai_llm.config.stream,
@@ -60,7 +60,7 @@ def test_get_messages(azure_openai_llm):
def test_when_no_deployment_name_provided():
config = BaseLlmConfig(temperature=0.7, model="gpt-3.5-turbo", max_tokens=50, system_prompt="System Prompt")
config = BaseLlmConfig(temperature=0.7, model="gpt-4o-mini", max_tokens=50, system_prompt="System Prompt")
with pytest.raises(ValueError):
llm = AzureOpenAILlm(config)
llm.get_llm_model_answer("Test Prompt")
@@ -70,7 +70,7 @@ def test_with_api_version():
config = BaseLlmConfig(
deployment_name="azure_deployment",
temperature=0.7,
model="gpt-3.5-turbo",
model="gpt-4o-mini",
max_tokens=50,
system_prompt="System Prompt",
api_version="2024-02-01",
@@ -83,7 +83,7 @@ def test_with_api_version():
mock_chat.assert_called_once_with(
deployment_name="azure_deployment",
openai_api_version="2024-02-01",
model_name="gpt-3.5-turbo",
model_name="gpt-4o-mini",
temperature=0.7,
max_tokens=50,
streaming=False,
@@ -108,7 +108,7 @@ def test_get_llm_model_answer_with_http_client_proxies():
max_tokens=50,
stream=False,
system_prompt="System prompt",
model="gpt-3.5-turbo",
model="gpt-4o-mini",
http_client_proxies="http://testproxy.mem0.net:8000",
)
@@ -118,7 +118,7 @@ def test_get_llm_model_answer_with_http_client_proxies():
mock_chat.assert_called_once_with(
deployment_name="azure_deployment",
openai_api_version="2024-02-01",
model_name="gpt-3.5-turbo",
model_name="gpt-4o-mini",
temperature=0.7,
max_tokens=50,
streaming=False,
@@ -144,7 +144,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies():
max_tokens=50,
stream=False,
system_prompt="System prompt",
model="gpt-3.5-turbo",
model="gpt-4o-mini",
http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"},
)
@@ -154,7 +154,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies():
mock_chat.assert_called_once_with(
deployment_name="azure_deployment",
openai_api_version="2024-02-01",
model_name="gpt-3.5-turbo",
model_name="gpt-4o-mini",
temperature=0.7,
max_tokens=50,
streaming=False,

View File

@@ -24,7 +24,7 @@ def config(env_config):
top_p=0.8,
stream=False,
system_prompt="System prompt",
model="gpt-3.5-turbo",
model="gpt-4o-mini",
http_client_proxies=None,
http_async_client_proxies=None,
)
@@ -211,7 +211,7 @@ def test_get_llm_model_answer_with_http_client_proxies(env_config, mocker):
top_p=0.8,
stream=False,
system_prompt="System prompt",
model="gpt-3.5-turbo",
model="gpt-4o-mini",
http_client_proxies="http://testproxy.mem0.net:8000",
)
@@ -246,7 +246,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies(env_config, mocker)
top_p=0.8,
stream=False,
system_prompt="System prompt",
model="gpt-3.5-turbo",
model="gpt-4o-mini",
http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"},
)