feat: openai default model uses gpt-4o-mini (#1526)

2024-09-09 15:28:28 +08:00
parent bf0cf2d9c4
commit 7170edd13f
18 changed files with 88 additions and 67 deletions
--- a/embedchain/configs/chroma.yaml
+++ b/embedchain/configs/chroma.yaml
@@ -5,7 +5,7 @@ app:
 llm:
  provider: openai
  config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
    temperature: 0.5
    max_tokens: 1000
    top_p: 1
--- a/embedchain/configs/full-stack.yaml
+++ b/embedchain/configs/full-stack.yaml
@@ -10,7 +10,7 @@ chunker:
 llm:
  provider: openai
  config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
    temperature: 0.5
    max_tokens: 1000
    top_p: 1
--- a/embedchain/configs/opensearch.yaml
+++ b/embedchain/configs/opensearch.yaml
@@ -8,7 +8,7 @@ app:
 llm:
  provider: openai
  config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
    temperature: 0.5
    max_tokens: 1000
    top_p: 1
--- a/embedchain/docs/api-reference/advanced/configuration.mdx
+++ b/embedchain/docs/api-reference/advanced/configuration.mdx
@@ -20,7 +20,7 @@ app:
 llm:
  provider: openai
  config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
    temperature: 0.5
    max_tokens: 1000
    top_p: 1
@@ -82,7 +82,7 @@ cache:
  "llm": {
    "provider": "openai",
    "config": {
-      "model": "gpt-3.5-turbo",
+      "model": "gpt-4o-mini",
      "temperature": 0.5,
      "max_tokens": 1000,
      "top_p": 1,
@@ -140,7 +140,7 @@ config = {
    'llm': {
        'provider': 'openai',
        'config': {
-            'model': 'gpt-3.5-turbo',
+            'model': 'gpt-4o-mini',
            'temperature': 0.5,
            'max_tokens': 1000,
            'top_p': 1,
@@ -206,7 +206,7 @@ Alright, let's dive into what each key means in the yaml config above:
 2. `llm` Section:
    - `provider` (String): The provider for the language model, which is set to 'openai'. You can find the full list of llm providers in [our docs](/components/llms).
    - `config`:
-        - `model` (String): The specific model being used, 'gpt-3.5-turbo'.
+        - `model` (String): The specific model being used, 'gpt-4o-mini'.
        - `temperature` (Float): Controls the randomness of the model's output. A higher value (closer to 1) makes the output more random.
        - `max_tokens` (Integer): Controls how many tokens are used in the response.
        - `top_p` (Float): Controls the diversity of word selection. A higher value (closer to 1) makes word selection more diverse.
--- a/embedchain/docs/components/llms.mdx
+++ b/embedchain/docs/components/llms.mdx
@@ -62,7 +62,7 @@ app = App.from_config(config_path="config.yaml")
 llm:
  provider: openai
  config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
    temperature: 0.5
    max_tokens: 1000
    top_p: 1
@@ -205,7 +205,7 @@ app = App.from_config(config_path="config.yaml")
 llm:
  provider: azure_openai
  config:
-    model: gpt-3.5-turbo
+    model: gpt-4o-mini
    deployment_name: your_llm_deployment_name
    temperature: 0.5
    max_tokens: 1000
@@ -887,7 +887,7 @@ response = app.chat("Which companies did Elon Musk found?")
 llm:
  provider: openai
  config:
-    model: gpt-3.5-turbo
+    model: gpt-4o-mini
    temperature: 0.5
    max_tokens: 1000
    token_usage: true
--- a/embedchain/docs/examples/rest-api/create.mdx
+++ b/embedchain/docs/examples/rest-api/create.mdx
@@ -32,7 +32,7 @@ app:
 llm:
  provider: openai
  config:
-    model: "gpt-3.5-turbo"
+    model: "gpt-4o-mini"
    temperature: 0.5
    max_tokens: 1000
    top_p: 1
--- a/embedchain/docs/get-started/faq.mdx
+++ b/embedchain/docs/get-started/faq.mdx
@@ -122,7 +122,7 @@ You can achieve this by setting `stream` to `true` in the config file.
 llm:
  provider: openai
  config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
    temperature: 0.5
    max_tokens: 1000
    top_p: 1
--- a/embedchain/embedchain/config/model_prices_and_context_window.json
+++ b/embedchain/embedchain/config/model_prices_and_context_window.json
@@ -1,6 +1,6 @@
 {
    "openai/gpt-4": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 8192,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.00003,
@@ -13,6 +13,20 @@
        "input_cost_per_token": 0.000005,
        "output_cost_per_token": 0.000015
    },
+   "gpt-4o-mini": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060
+    },
+    "gpt-4o-mini-2024-07-18": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060
+    },
    "openai/gpt-4o-2024-05-13": {
        "max_tokens": 4096,
        "max_input_tokens": 128000,
@@ -153,7 +167,7 @@
    "openai/text-embedding-ada-002": {
        "max_tokens": 8191,
        "max_input_tokens": 8191,
-        "output_vector_size": 1536, 
+        "output_vector_size": 1536,
        "input_cost_per_token": 0.0000001,
        "output_cost_per_token": 0.000000
    },
@@ -176,7 +190,7 @@
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.000002,
        "output_cost_per_token": 0.000002
-    },    
+    },
    "openai/gpt-3.5-turbo-instruct": {
        "max_tokens": 4096,
        "max_input_tokens": 8192,
@@ -197,6 +211,13 @@
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.000005,
        "output_cost_per_token": 0.000015
+    },
+     "azure/gpt-4o-mini": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060
    },
    "azure/gpt-4-turbo-2024-04-09": {
        "max_tokens": 4096,
@@ -325,7 +346,7 @@
        "max_input_tokens": 8191,
        "input_cost_per_token": 0.00000002,
        "output_cost_per_token": 0.000000
-    }, 
+    },
    "mistralai/mistral-tiny": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
@@ -595,77 +616,77 @@
        "max_tokens": 8192,
        "max_input_tokens": 32760,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.0000005
    },
-    "vertexai/gemini-1.0-pro": { 
+    "vertexai/gemini-1.0-pro": {
        "max_tokens": 8192,
        "max_input_tokens": 32760,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.0000005
    },
-    "vertexai/gemini-1.0-pro-001": { 
+    "vertexai/gemini-1.0-pro-001": {
        "max_tokens": 8192,
        "max_input_tokens": 32760,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.0000005
    },
-    "vertexai/gemini-1.0-pro-002": { 
+    "vertexai/gemini-1.0-pro-002": {
        "max_tokens": 8192,
        "max_input_tokens": 32760,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.0000005
    },
-    "vertexai/gemini-1.5-pro": { 
+    "vertexai/gemini-1.5-pro": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
        "output_cost_per_token": 0.000001875
    },
    "vertexai/gemini-1.5-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0, 
+        "input_cost_per_token": 0,
        "output_cost_per_token": 0
    },
    "vertexai/gemini-1.5-flash-preview-0514": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0, 
+        "input_cost_per_token": 0,
        "output_cost_per_token": 0
    },
-    "vertexai/gemini-1.5-pro-001": { 
+    "vertexai/gemini-1.5-pro-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
        "output_cost_per_token": 0.000001875
    },
-    "vertexai/gemini-1.5-pro-preview-0514": { 
+    "vertexai/gemini-1.5-pro-preview-0514": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
        "output_cost_per_token": 0.000001875
    },
-    "vertexai/gemini-1.5-pro-preview-0215": { 
+    "vertexai/gemini-1.5-pro-preview-0215": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
        "output_cost_per_token": 0.000001875
    },
    "vertexai/gemini-1.5-pro-preview-0409": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
        "output_cost_per_token": 0.000001875
    },
    "vertexai/gemini-experimental": {
@@ -682,7 +703,7 @@
        "max_images_per_prompt": 16,
        "max_videos_per_prompt": 1,
        "max_video_length": 2,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.0000005
    },
    "vertexai/gemini-1.0-pro-vision": {
@@ -692,7 +713,7 @@
        "max_images_per_prompt": 16,
        "max_videos_per_prompt": 1,
        "max_video_length": 2,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.0000005
    },
    "vertexai/gemini-1.0-pro-vision-001": {
@@ -702,7 +723,7 @@
        "max_images_per_prompt": 16,
        "max_videos_per_prompt": 1,
        "max_video_length": 2,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.0000005
    },
    "vertexai/claude-3-sonnet@20240229": {
@@ -713,7 +734,7 @@
        "output_cost_per_token": 0.000015
    },
    "vertexai/claude-3-haiku@20240307": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 200000,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.00000025,
@@ -727,49 +748,49 @@
        "output_cost_per_token": 0.000075
    },
    "cohere/command-r": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 128000,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.00000050,
        "output_cost_per_token": 0.0000015
    },
    "cohere/command-light": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.000015,
        "output_cost_per_token": 0.000015
    },
    "cohere/command-r-plus": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 128000,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000015
    },
    "cohere/command-nightly": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.000015,
        "output_cost_per_token": 0.000015
    },
     "cohere/command": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.000015,
        "output_cost_per_token": 0.000015
    },
     "cohere/command-medium-beta": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.000015,
        "output_cost_per_token": 0.000015
    },
     "cohere/command-xlarge-beta": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.000015,
--- a/embedchain/embedchain/llm/azure_openai.py
+++ b/embedchain/embedchain/llm/azure_openai.py
@@ -26,7 +26,7 @@ class AzureOpenAILlm(BaseLlm):
        chat = AzureChatOpenAI(
            deployment_name=config.deployment_name,
            openai_api_version=str(config.api_version) if config.api_version else "2024-02-01",
-            model_name=config.model or "gpt-3.5-turbo",
+            model_name=config.model or "gpt-4o-mini",
            temperature=config.temperature,
            max_tokens=config.max_tokens,
            streaming=config.stream,
--- a/embedchain/embedchain/llm/openai.py
+++ b/embedchain/embedchain/llm/openai.py
@@ -52,7 +52,7 @@ class OpenAILlm(BaseLlm):
            messages.append(SystemMessage(content=config.system_prompt))
        messages.append(HumanMessage(content=prompt))
        kwargs = {
-            "model": config.model or "gpt-3.5-turbo",
+            "model": config.model or "gpt-4o-mini",
            "temperature": config.temperature,
            "max_tokens": config.max_tokens,
            "model_kwargs": config.model_kwargs or {},
--- a/embedchain/examples/chat-pdf/app.py
+++ b/embedchain/examples/chat-pdf/app.py
@@ -17,7 +17,7 @@ def embedchain_bot(db_path, api_key):
            "llm": {
                "provider": "openai",
                "config": {
-                    "model": "gpt-3.5-turbo-1106",
+                    "model": "gpt-4o-mini",
                    "temperature": 0.5,
                    "max_tokens": 1000,
                    "top_p": 1,
--- a/embedchain/examples/rest-api/sample-config.yaml
+++ b/embedchain/examples/rest-api/sample-config.yaml
@@ -5,7 +5,7 @@ app:
 llm:
  provider: openai
  config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
    temperature: 0.5
    max_tokens: 1000
    top_p: 1
--- a/embedchain/notebooks/openai.ipynb
+++ b/embedchain/notebooks/openai.ipynb
@@ -80,7 +80,7 @@
        "    \"llm\": {\n",
        "        \"provider\": \"openai\",\n",
        "        \"config\": {\n",
-        "            \"model\": \"gpt-3.5-turbo\",\n",
+        "            \"model\": \"gpt-4o-mini\",\n",
        "            \"temperature\": 0.5,\n",
        "            \"max_tokens\": 1000,\n",
        "            \"top_p\": 1,\n",
--- a/embedchain/tests/llm/test_azure_openai.py
+++ b/embedchain/tests/llm/test_azure_openai.py
@@ -13,7 +13,7 @@ def azure_openai_llm():
    config = BaseLlmConfig(
        deployment_name="azure_deployment",
        temperature=0.7,
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
        max_tokens=50,
        system_prompt="System Prompt",
    )
@@ -40,7 +40,7 @@ def test_get_answer(azure_openai_llm):
        mock_chat.assert_called_once_with(
            deployment_name=azure_openai_llm.config.deployment_name,
            openai_api_version="2024-02-01",
-            model_name=azure_openai_llm.config.model or "gpt-3.5-turbo",
+            model_name=azure_openai_llm.config.model or "gpt-4o-mini",
            temperature=azure_openai_llm.config.temperature,
            max_tokens=azure_openai_llm.config.max_tokens,
            streaming=azure_openai_llm.config.stream,
@@ -60,7 +60,7 @@ def test_get_messages(azure_openai_llm):


 def test_when_no_deployment_name_provided():
-    config = BaseLlmConfig(temperature=0.7, model="gpt-3.5-turbo", max_tokens=50, system_prompt="System Prompt")
+    config = BaseLlmConfig(temperature=0.7, model="gpt-4o-mini", max_tokens=50, system_prompt="System Prompt")
    with pytest.raises(ValueError):
        llm = AzureOpenAILlm(config)
        llm.get_llm_model_answer("Test Prompt")
@@ -70,7 +70,7 @@ def test_with_api_version():
    config = BaseLlmConfig(
        deployment_name="azure_deployment",
        temperature=0.7,
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
        max_tokens=50,
        system_prompt="System Prompt",
        api_version="2024-02-01",
@@ -83,7 +83,7 @@ def test_with_api_version():
        mock_chat.assert_called_once_with(
            deployment_name="azure_deployment",
            openai_api_version="2024-02-01",
-            model_name="gpt-3.5-turbo",
+            model_name="gpt-4o-mini",
            temperature=0.7,
            max_tokens=50,
            streaming=False,
@@ -108,7 +108,7 @@ def test_get_llm_model_answer_with_http_client_proxies():
            max_tokens=50,
            stream=False,
            system_prompt="System prompt",
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
            http_client_proxies="http://testproxy.mem0.net:8000",
        )

@@ -118,7 +118,7 @@ def test_get_llm_model_answer_with_http_client_proxies():
        mock_chat.assert_called_once_with(
            deployment_name="azure_deployment",
            openai_api_version="2024-02-01",
-            model_name="gpt-3.5-turbo",
+            model_name="gpt-4o-mini",
            temperature=0.7,
            max_tokens=50,
            streaming=False,
@@ -144,7 +144,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies():
            max_tokens=50,
            stream=False,
            system_prompt="System prompt",
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
            http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"},
        )

@@ -154,7 +154,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies():
        mock_chat.assert_called_once_with(
            deployment_name="azure_deployment",
            openai_api_version="2024-02-01",
-            model_name="gpt-3.5-turbo",
+            model_name="gpt-4o-mini",
            temperature=0.7,
            max_tokens=50,
            streaming=False,
--- a/embedchain/tests/llm/test_openai.py
+++ b/embedchain/tests/llm/test_openai.py
@@ -24,7 +24,7 @@ def config(env_config):
        top_p=0.8,
        stream=False,
        system_prompt="System prompt",
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
        http_client_proxies=None,
        http_async_client_proxies=None,
    )
@@ -211,7 +211,7 @@ def test_get_llm_model_answer_with_http_client_proxies(env_config, mocker):
        top_p=0.8,
        stream=False,
        system_prompt="System prompt",
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
        http_client_proxies="http://testproxy.mem0.net:8000",
    )

@@ -246,7 +246,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies(env_config, mocker)
        top_p=0.8,
        stream=False,
        system_prompt="System prompt",
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
        http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"},
    )