feat: openai default model uses gpt-4o-mini (#1526)

This commit is contained in:
Kirk Lin
2024-09-09 15:28:28 +08:00
committed by GitHub
parent bf0cf2d9c4
commit 7170edd13f
18 changed files with 88 additions and 67 deletions

View File

@@ -1,6 +1,6 @@
{
"openai/gpt-4": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 8192,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00003,
@@ -13,6 +13,20 @@
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015
},
"gpt-4o-mini": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060
},
"gpt-4o-mini-2024-07-18": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060
},
"openai/gpt-4o-2024-05-13": {
"max_tokens": 4096,
"max_input_tokens": 128000,
@@ -153,7 +167,7 @@
"openai/text-embedding-ada-002": {
"max_tokens": 8191,
"max_input_tokens": 8191,
"output_vector_size": 1536,
"output_vector_size": 1536,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.000000
},
@@ -176,7 +190,7 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002
},
},
"openai/gpt-3.5-turbo-instruct": {
"max_tokens": 4096,
"max_input_tokens": 8192,
@@ -197,6 +211,13 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015
},
"azure/gpt-4o-mini": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060
},
"azure/gpt-4-turbo-2024-04-09": {
"max_tokens": 4096,
@@ -325,7 +346,7 @@
"max_input_tokens": 8191,
"input_cost_per_token": 0.00000002,
"output_cost_per_token": 0.000000
},
},
"mistralai/mistral-tiny": {
"max_tokens": 8191,
"max_input_tokens": 32000,
@@ -595,77 +616,77 @@
"max_tokens": 8192,
"max_input_tokens": 32760,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro": {
"vertexai/gemini-1.0-pro": {
"max_tokens": 8192,
"max_input_tokens": 32760,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro-001": {
"vertexai/gemini-1.0-pro-001": {
"max_tokens": 8192,
"max_input_tokens": 32760,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro-002": {
"vertexai/gemini-1.0-pro-002": {
"max_tokens": 8192,
"max_input_tokens": 32760,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.5-pro": {
"vertexai/gemini-1.5-pro": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-1.5-flash-001": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0,
"input_cost_per_token": 0,
"output_cost_per_token": 0
},
"vertexai/gemini-1.5-flash-preview-0514": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0,
"input_cost_per_token": 0,
"output_cost_per_token": 0
},
"vertexai/gemini-1.5-pro-001": {
"vertexai/gemini-1.5-pro-001": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-1.5-pro-preview-0514": {
"vertexai/gemini-1.5-pro-preview-0514": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-1.5-pro-preview-0215": {
"vertexai/gemini-1.5-pro-preview-0215": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-1.5-pro-preview-0409": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000000625,
"input_cost_per_token": 0.000000625,
"output_cost_per_token": 0.000001875
},
"vertexai/gemini-experimental": {
@@ -682,7 +703,7 @@
"max_images_per_prompt": 16,
"max_videos_per_prompt": 1,
"max_video_length": 2,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro-vision": {
@@ -692,7 +713,7 @@
"max_images_per_prompt": 16,
"max_videos_per_prompt": 1,
"max_video_length": 2,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/gemini-1.0-pro-vision-001": {
@@ -702,7 +723,7 @@
"max_images_per_prompt": 16,
"max_videos_per_prompt": 1,
"max_video_length": 2,
"input_cost_per_token": 0.00000025,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005
},
"vertexai/claude-3-sonnet@20240229": {
@@ -713,7 +734,7 @@
"output_cost_per_token": 0.000015
},
"vertexai/claude-3-haiku@20240307": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000025,
@@ -727,49 +748,49 @@
"output_cost_per_token": 0.000075
},
"cohere/command-r": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000050,
"output_cost_per_token": 0.0000015
},
"cohere/command-light": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"cohere/command-r-plus": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015
},
"cohere/command-nightly": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"cohere/command": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"cohere/command-medium-beta": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015
},
"cohere/command-xlarge-beta": {
"max_tokens": 4096,
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,

View File

@@ -26,7 +26,7 @@ class AzureOpenAILlm(BaseLlm):
chat = AzureChatOpenAI(
deployment_name=config.deployment_name,
openai_api_version=str(config.api_version) if config.api_version else "2024-02-01",
model_name=config.model or "gpt-3.5-turbo",
model_name=config.model or "gpt-4o-mini",
temperature=config.temperature,
max_tokens=config.max_tokens,
streaming=config.stream,

View File

@@ -52,7 +52,7 @@ class OpenAILlm(BaseLlm):
messages.append(SystemMessage(content=config.system_prompt))
messages.append(HumanMessage(content=prompt))
kwargs = {
"model": config.model or "gpt-3.5-turbo",
"model": config.model or "gpt-4o-mini",
"temperature": config.temperature,
"max_tokens": config.max_tokens,
"model_kwargs": config.model_kwargs or {},