feat: openai default model uses gpt-4o-mini (#1526)
This commit is contained in:
@@ -5,7 +5,7 @@ app:
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: 'gpt-3.5-turbo'
|
||||
model: 'gpt-4o-mini'
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
top_p: 1
|
||||
|
||||
@@ -10,7 +10,7 @@ chunker:
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: 'gpt-3.5-turbo'
|
||||
model: 'gpt-4o-mini'
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
top_p: 1
|
||||
|
||||
@@ -8,7 +8,7 @@ app:
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: 'gpt-3.5-turbo'
|
||||
model: 'gpt-4o-mini'
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
top_p: 1
|
||||
|
||||
@@ -20,7 +20,7 @@ app:
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: 'gpt-3.5-turbo'
|
||||
model: 'gpt-4o-mini'
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
top_p: 1
|
||||
@@ -82,7 +82,7 @@ cache:
|
||||
"llm": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"model": "gpt-4o-mini",
|
||||
"temperature": 0.5,
|
||||
"max_tokens": 1000,
|
||||
"top_p": 1,
|
||||
@@ -140,7 +140,7 @@ config = {
|
||||
'llm': {
|
||||
'provider': 'openai',
|
||||
'config': {
|
||||
'model': 'gpt-3.5-turbo',
|
||||
'model': 'gpt-4o-mini',
|
||||
'temperature': 0.5,
|
||||
'max_tokens': 1000,
|
||||
'top_p': 1,
|
||||
@@ -206,7 +206,7 @@ Alright, let's dive into what each key means in the yaml config above:
|
||||
2. `llm` Section:
|
||||
- `provider` (String): The provider for the language model, which is set to 'openai'. You can find the full list of llm providers in [our docs](/components/llms).
|
||||
- `config`:
|
||||
- `model` (String): The specific model being used, 'gpt-3.5-turbo'.
|
||||
- `model` (String): The specific model being used, 'gpt-4o-mini'.
|
||||
- `temperature` (Float): Controls the randomness of the model's output. A higher value (closer to 1) makes the output more random.
|
||||
- `max_tokens` (Integer): Controls how many tokens are used in the response.
|
||||
- `top_p` (Float): Controls the diversity of word selection. A higher value (closer to 1) makes word selection more diverse.
|
||||
|
||||
@@ -62,7 +62,7 @@ app = App.from_config(config_path="config.yaml")
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: 'gpt-3.5-turbo'
|
||||
model: 'gpt-4o-mini'
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
top_p: 1
|
||||
@@ -205,7 +205,7 @@ app = App.from_config(config_path="config.yaml")
|
||||
llm:
|
||||
provider: azure_openai
|
||||
config:
|
||||
model: gpt-3.5-turbo
|
||||
model: gpt-4o-mini
|
||||
deployment_name: your_llm_deployment_name
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
@@ -887,7 +887,7 @@ response = app.chat("Which companies did Elon Musk found?")
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: gpt-3.5-turbo
|
||||
model: gpt-4o-mini
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
token_usage: true
|
||||
|
||||
@@ -32,7 +32,7 @@ app:
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: "gpt-3.5-turbo"
|
||||
model: "gpt-4o-mini"
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
top_p: 1
|
||||
|
||||
@@ -122,7 +122,7 @@ You can achieve this by setting `stream` to `true` in the config file.
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: 'gpt-3.5-turbo'
|
||||
model: 'gpt-4o-mini'
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
top_p: 1
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"openai/gpt-4": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00003,
|
||||
@@ -13,6 +13,20 @@
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"gpt-4o-mini": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060
|
||||
},
|
||||
"gpt-4o-mini-2024-07-18": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060
|
||||
},
|
||||
"openai/gpt-4o-2024-05-13": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
@@ -153,7 +167,7 @@
|
||||
"openai/text-embedding-ada-002": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 8191,
|
||||
"output_vector_size": 1536,
|
||||
"output_vector_size": 1536,
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.000000
|
||||
},
|
||||
@@ -176,7 +190,7 @@
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000002
|
||||
},
|
||||
},
|
||||
"openai/gpt-3.5-turbo-instruct": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 8192,
|
||||
@@ -197,6 +211,13 @@
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"azure/gpt-4o-mini": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060
|
||||
},
|
||||
"azure/gpt-4-turbo-2024-04-09": {
|
||||
"max_tokens": 4096,
|
||||
@@ -325,7 +346,7 @@
|
||||
"max_input_tokens": 8191,
|
||||
"input_cost_per_token": 0.00000002,
|
||||
"output_cost_per_token": 0.000000
|
||||
},
|
||||
},
|
||||
"mistralai/mistral-tiny": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
@@ -595,77 +616,77 @@
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32760,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005
|
||||
},
|
||||
"vertexai/gemini-1.0-pro": {
|
||||
"vertexai/gemini-1.0-pro": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32760,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005
|
||||
},
|
||||
"vertexai/gemini-1.0-pro-001": {
|
||||
"vertexai/gemini-1.0-pro-001": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32760,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005
|
||||
},
|
||||
"vertexai/gemini-1.0-pro-002": {
|
||||
"vertexai/gemini-1.0-pro-002": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32760,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005
|
||||
},
|
||||
"vertexai/gemini-1.5-pro": {
|
||||
"vertexai/gemini-1.5-pro": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"output_cost_per_token": 0.000001875
|
||||
},
|
||||
"vertexai/gemini-1.5-flash-001": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0
|
||||
},
|
||||
"vertexai/gemini-1.5-flash-preview-0514": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0
|
||||
},
|
||||
"vertexai/gemini-1.5-pro-001": {
|
||||
"vertexai/gemini-1.5-pro-001": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"output_cost_per_token": 0.000001875
|
||||
},
|
||||
"vertexai/gemini-1.5-pro-preview-0514": {
|
||||
"vertexai/gemini-1.5-pro-preview-0514": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"output_cost_per_token": 0.000001875
|
||||
},
|
||||
"vertexai/gemini-1.5-pro-preview-0215": {
|
||||
"vertexai/gemini-1.5-pro-preview-0215": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"output_cost_per_token": 0.000001875
|
||||
},
|
||||
"vertexai/gemini-1.5-pro-preview-0409": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"input_cost_per_token": 0.000000625,
|
||||
"output_cost_per_token": 0.000001875
|
||||
},
|
||||
"vertexai/gemini-experimental": {
|
||||
@@ -682,7 +703,7 @@
|
||||
"max_images_per_prompt": 16,
|
||||
"max_videos_per_prompt": 1,
|
||||
"max_video_length": 2,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005
|
||||
},
|
||||
"vertexai/gemini-1.0-pro-vision": {
|
||||
@@ -692,7 +713,7 @@
|
||||
"max_images_per_prompt": 16,
|
||||
"max_videos_per_prompt": 1,
|
||||
"max_video_length": 2,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005
|
||||
},
|
||||
"vertexai/gemini-1.0-pro-vision-001": {
|
||||
@@ -702,7 +723,7 @@
|
||||
"max_images_per_prompt": 16,
|
||||
"max_videos_per_prompt": 1,
|
||||
"max_video_length": 2,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005
|
||||
},
|
||||
"vertexai/claude-3-sonnet@20240229": {
|
||||
@@ -713,7 +734,7 @@
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"vertexai/claude-3-haiku@20240307": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
@@ -727,49 +748,49 @@
|
||||
"output_cost_per_token": 0.000075
|
||||
},
|
||||
"cohere/command-r": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000050,
|
||||
"output_cost_per_token": 0.0000015
|
||||
},
|
||||
"cohere/command-light": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"cohere/command-r-plus": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"cohere/command-nightly": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"cohere/command": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"cohere/command-medium-beta": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"cohere/command-xlarge-beta": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
|
||||
@@ -26,7 +26,7 @@ class AzureOpenAILlm(BaseLlm):
|
||||
chat = AzureChatOpenAI(
|
||||
deployment_name=config.deployment_name,
|
||||
openai_api_version=str(config.api_version) if config.api_version else "2024-02-01",
|
||||
model_name=config.model or "gpt-3.5-turbo",
|
||||
model_name=config.model or "gpt-4o-mini",
|
||||
temperature=config.temperature,
|
||||
max_tokens=config.max_tokens,
|
||||
streaming=config.stream,
|
||||
|
||||
@@ -52,7 +52,7 @@ class OpenAILlm(BaseLlm):
|
||||
messages.append(SystemMessage(content=config.system_prompt))
|
||||
messages.append(HumanMessage(content=prompt))
|
||||
kwargs = {
|
||||
"model": config.model or "gpt-3.5-turbo",
|
||||
"model": config.model or "gpt-4o-mini",
|
||||
"temperature": config.temperature,
|
||||
"max_tokens": config.max_tokens,
|
||||
"model_kwargs": config.model_kwargs or {},
|
||||
|
||||
@@ -17,7 +17,7 @@ def embedchain_bot(db_path, api_key):
|
||||
"llm": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"model": "gpt-4o-mini",
|
||||
"temperature": 0.5,
|
||||
"max_tokens": 1000,
|
||||
"top_p": 1,
|
||||
|
||||
@@ -5,7 +5,7 @@ app:
|
||||
llm:
|
||||
provider: openai
|
||||
config:
|
||||
model: 'gpt-3.5-turbo'
|
||||
model: 'gpt-4o-mini'
|
||||
temperature: 0.5
|
||||
max_tokens: 1000
|
||||
top_p: 1
|
||||
|
||||
@@ -80,7 +80,7 @@
|
||||
" \"llm\": {\n",
|
||||
" \"provider\": \"openai\",\n",
|
||||
" \"config\": {\n",
|
||||
" \"model\": \"gpt-3.5-turbo\",\n",
|
||||
" \"model\": \"gpt-4o-mini\",\n",
|
||||
" \"temperature\": 0.5,\n",
|
||||
" \"max_tokens\": 1000,\n",
|
||||
" \"top_p\": 1,\n",
|
||||
|
||||
@@ -13,7 +13,7 @@ def azure_openai_llm():
|
||||
config = BaseLlmConfig(
|
||||
deployment_name="azure_deployment",
|
||||
temperature=0.7,
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-4o-mini",
|
||||
max_tokens=50,
|
||||
system_prompt="System Prompt",
|
||||
)
|
||||
@@ -40,7 +40,7 @@ def test_get_answer(azure_openai_llm):
|
||||
mock_chat.assert_called_once_with(
|
||||
deployment_name=azure_openai_llm.config.deployment_name,
|
||||
openai_api_version="2024-02-01",
|
||||
model_name=azure_openai_llm.config.model or "gpt-3.5-turbo",
|
||||
model_name=azure_openai_llm.config.model or "gpt-4o-mini",
|
||||
temperature=azure_openai_llm.config.temperature,
|
||||
max_tokens=azure_openai_llm.config.max_tokens,
|
||||
streaming=azure_openai_llm.config.stream,
|
||||
@@ -60,7 +60,7 @@ def test_get_messages(azure_openai_llm):
|
||||
|
||||
|
||||
def test_when_no_deployment_name_provided():
|
||||
config = BaseLlmConfig(temperature=0.7, model="gpt-3.5-turbo", max_tokens=50, system_prompt="System Prompt")
|
||||
config = BaseLlmConfig(temperature=0.7, model="gpt-4o-mini", max_tokens=50, system_prompt="System Prompt")
|
||||
with pytest.raises(ValueError):
|
||||
llm = AzureOpenAILlm(config)
|
||||
llm.get_llm_model_answer("Test Prompt")
|
||||
@@ -70,7 +70,7 @@ def test_with_api_version():
|
||||
config = BaseLlmConfig(
|
||||
deployment_name="azure_deployment",
|
||||
temperature=0.7,
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-4o-mini",
|
||||
max_tokens=50,
|
||||
system_prompt="System Prompt",
|
||||
api_version="2024-02-01",
|
||||
@@ -83,7 +83,7 @@ def test_with_api_version():
|
||||
mock_chat.assert_called_once_with(
|
||||
deployment_name="azure_deployment",
|
||||
openai_api_version="2024-02-01",
|
||||
model_name="gpt-3.5-turbo",
|
||||
model_name="gpt-4o-mini",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
streaming=False,
|
||||
@@ -108,7 +108,7 @@ def test_get_llm_model_answer_with_http_client_proxies():
|
||||
max_tokens=50,
|
||||
stream=False,
|
||||
system_prompt="System prompt",
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-4o-mini",
|
||||
http_client_proxies="http://testproxy.mem0.net:8000",
|
||||
)
|
||||
|
||||
@@ -118,7 +118,7 @@ def test_get_llm_model_answer_with_http_client_proxies():
|
||||
mock_chat.assert_called_once_with(
|
||||
deployment_name="azure_deployment",
|
||||
openai_api_version="2024-02-01",
|
||||
model_name="gpt-3.5-turbo",
|
||||
model_name="gpt-4o-mini",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
streaming=False,
|
||||
@@ -144,7 +144,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies():
|
||||
max_tokens=50,
|
||||
stream=False,
|
||||
system_prompt="System prompt",
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-4o-mini",
|
||||
http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"},
|
||||
)
|
||||
|
||||
@@ -154,7 +154,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies():
|
||||
mock_chat.assert_called_once_with(
|
||||
deployment_name="azure_deployment",
|
||||
openai_api_version="2024-02-01",
|
||||
model_name="gpt-3.5-turbo",
|
||||
model_name="gpt-4o-mini",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
streaming=False,
|
||||
|
||||
@@ -24,7 +24,7 @@ def config(env_config):
|
||||
top_p=0.8,
|
||||
stream=False,
|
||||
system_prompt="System prompt",
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-4o-mini",
|
||||
http_client_proxies=None,
|
||||
http_async_client_proxies=None,
|
||||
)
|
||||
@@ -211,7 +211,7 @@ def test_get_llm_model_answer_with_http_client_proxies(env_config, mocker):
|
||||
top_p=0.8,
|
||||
stream=False,
|
||||
system_prompt="System prompt",
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-4o-mini",
|
||||
http_client_proxies="http://testproxy.mem0.net:8000",
|
||||
)
|
||||
|
||||
@@ -246,7 +246,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies(env_config, mocker)
|
||||
top_p=0.8,
|
||||
stream=False,
|
||||
system_prompt="System prompt",
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-4o-mini",
|
||||
http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"},
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user