diff --git a/docs/components/llms/models/litellm.mdx b/docs/components/llms/models/litellm.mdx
index c0db85cd..4a6f47cd 100644
--- a/docs/components/llms/models/litellm.mdx
+++ b/docs/components/llms/models/litellm.mdx
@@ -12,7 +12,7 @@ config = {
     "llm": {
         "provider": "litellm",
         "config": {
-            "model": "gpt-3.5-turbo",
+            "model": "gpt-4o-mini",
             "temperature": 0.2,
             "max_tokens": 1500,
         }
diff --git a/embedchain/configs/chroma.yaml b/embedchain/configs/chroma.yaml
index 3d1e4359..142eb05f 100644
--- a/embedchain/configs/chroma.yaml
+++ b/embedchain/configs/chroma.yaml
@@ -5,7 +5,7 @@ app:
 llm:
   provider: openai
   config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
     temperature: 0.5
     max_tokens: 1000
     top_p: 1
diff --git a/embedchain/configs/full-stack.yaml b/embedchain/configs/full-stack.yaml
index dc337957..978722ea 100644
--- a/embedchain/configs/full-stack.yaml
+++ b/embedchain/configs/full-stack.yaml
@@ -10,7 +10,7 @@ chunker:
 llm:
   provider: openai
   config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
     temperature: 0.5
     max_tokens: 1000
     top_p: 1
diff --git a/embedchain/configs/opensearch.yaml b/embedchain/configs/opensearch.yaml
index 4918775e..94a27b29 100644
--- a/embedchain/configs/opensearch.yaml
+++ b/embedchain/configs/opensearch.yaml
@@ -8,7 +8,7 @@ app:
 llm:
   provider: openai
   config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
     temperature: 0.5
     max_tokens: 1000
     top_p: 1
diff --git a/embedchain/docs/api-reference/advanced/configuration.mdx b/embedchain/docs/api-reference/advanced/configuration.mdx
index 2949ec45..83896358 100644
--- a/embedchain/docs/api-reference/advanced/configuration.mdx
+++ b/embedchain/docs/api-reference/advanced/configuration.mdx
@@ -20,7 +20,7 @@ app:
 llm:
   provider: openai
   config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
     temperature: 0.5
     max_tokens: 1000
     top_p: 1
@@ -82,7 +82,7 @@ cache:
   "llm": {
     "provider": "openai",
     "config": {
-      "model": "gpt-3.5-turbo",
+      "model": "gpt-4o-mini",
       "temperature": 0.5,
       "max_tokens": 1000,
       "top_p": 1,
@@ -140,7 +140,7 @@ config = {
     'llm': {
         'provider': 'openai',
         'config': {
-            'model': 'gpt-3.5-turbo',
+            'model': 'gpt-4o-mini',
             'temperature': 0.5,
             'max_tokens': 1000,
             'top_p': 1,
@@ -206,7 +206,7 @@ Alright, let's dive into what each key means in the yaml config above:
 2. `llm` Section:
     - `provider` (String): The provider for the language model, which is set to 'openai'. You can find the full list of llm providers in [our docs](/components/llms).
     - `config`:
-        - `model` (String): The specific model being used, 'gpt-3.5-turbo'.
+        - `model` (String): The specific model being used, 'gpt-4o-mini'.
         - `temperature` (Float): Controls the randomness of the model's output. A higher value (closer to 1) makes the output more random.
         - `max_tokens` (Integer): Controls how many tokens are used in the response.
         - `top_p` (Float): Controls the diversity of word selection. A higher value (closer to 1) makes word selection more diverse.
diff --git a/embedchain/docs/components/llms.mdx b/embedchain/docs/components/llms.mdx
index c2cae52c..c0034dc1 100644
--- a/embedchain/docs/components/llms.mdx
+++ b/embedchain/docs/components/llms.mdx
@@ -62,7 +62,7 @@ app = App.from_config(config_path="config.yaml")
 llm:
   provider: openai
   config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
     temperature: 0.5
     max_tokens: 1000
     top_p: 1
@@ -205,7 +205,7 @@ app = App.from_config(config_path="config.yaml")
 llm:
   provider: azure_openai
   config:
-    model: gpt-3.5-turbo
+    model: gpt-4o-mini
     deployment_name: your_llm_deployment_name
     temperature: 0.5
     max_tokens: 1000
@@ -887,7 +887,7 @@ response = app.chat("Which companies did Elon Musk found?")
 llm:
   provider: openai
   config:
-    model: gpt-3.5-turbo
+    model: gpt-4o-mini
     temperature: 0.5
     max_tokens: 1000
     token_usage: true
diff --git a/embedchain/docs/examples/rest-api/create.mdx b/embedchain/docs/examples/rest-api/create.mdx
index 6736bf7a..35863cea 100644
--- a/embedchain/docs/examples/rest-api/create.mdx
+++ b/embedchain/docs/examples/rest-api/create.mdx
@@ -32,7 +32,7 @@ app:
 llm:
   provider: openai
   config:
-    model: "gpt-3.5-turbo"
+    model: "gpt-4o-mini"
     temperature: 0.5
     max_tokens: 1000
     top_p: 1
diff --git a/embedchain/docs/get-started/faq.mdx b/embedchain/docs/get-started/faq.mdx
index fd00258c..3acae267 100644
--- a/embedchain/docs/get-started/faq.mdx
+++ b/embedchain/docs/get-started/faq.mdx
@@ -122,7 +122,7 @@ You can achieve this by setting `stream` to `true` in the config file.
 llm:
   provider: openai
   config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
     temperature: 0.5
     max_tokens: 1000
     top_p: 1
diff --git a/embedchain/embedchain/config/model_prices_and_context_window.json b/embedchain/embedchain/config/model_prices_and_context_window.json
index 1a9b6e4d..744a09c0 100644
--- a/embedchain/embedchain/config/model_prices_and_context_window.json
+++ b/embedchain/embedchain/config/model_prices_and_context_window.json
@@ -1,6 +1,6 @@
 {
     "openai/gpt-4": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 8192,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.00003,
@@ -13,6 +13,20 @@
         "input_cost_per_token": 0.000005,
         "output_cost_per_token": 0.000015
     },
+   "gpt-4o-mini": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060
+    },
+    "gpt-4o-mini-2024-07-18": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060
+    },
     "openai/gpt-4o-2024-05-13": {
         "max_tokens": 4096,
         "max_input_tokens": 128000,
@@ -153,7 +167,7 @@
     "openai/text-embedding-ada-002": {
         "max_tokens": 8191,
         "max_input_tokens": 8191,
-        "output_vector_size": 1536, 
+        "output_vector_size": 1536,
         "input_cost_per_token": 0.0000001,
         "output_cost_per_token": 0.000000
     },
@@ -176,7 +190,7 @@
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000002,
         "output_cost_per_token": 0.000002
-    },    
+    },
     "openai/gpt-3.5-turbo-instruct": {
         "max_tokens": 4096,
         "max_input_tokens": 8192,
@@ -197,6 +211,13 @@
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000005,
         "output_cost_per_token": 0.000015
+    },
+     "azure/gpt-4o-mini": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060
     },
     "azure/gpt-4-turbo-2024-04-09": {
         "max_tokens": 4096,
@@ -325,7 +346,7 @@
         "max_input_tokens": 8191,
         "input_cost_per_token": 0.00000002,
         "output_cost_per_token": 0.000000
-    }, 
+    },
     "mistralai/mistral-tiny": {
         "max_tokens": 8191,
         "max_input_tokens": 32000,
@@ -595,77 +616,77 @@
         "max_tokens": 8192,
         "max_input_tokens": 32760,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
         "output_cost_per_token": 0.0000005
     },
-    "vertexai/gemini-1.0-pro": { 
+    "vertexai/gemini-1.0-pro": {
         "max_tokens": 8192,
         "max_input_tokens": 32760,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
         "output_cost_per_token": 0.0000005
     },
-    "vertexai/gemini-1.0-pro-001": { 
+    "vertexai/gemini-1.0-pro-001": {
         "max_tokens": 8192,
         "max_input_tokens": 32760,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
         "output_cost_per_token": 0.0000005
     },
-    "vertexai/gemini-1.0-pro-002": { 
+    "vertexai/gemini-1.0-pro-002": {
         "max_tokens": 8192,
         "max_input_tokens": 32760,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
         "output_cost_per_token": 0.0000005
     },
-    "vertexai/gemini-1.5-pro": { 
+    "vertexai/gemini-1.5-pro": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
         "output_cost_per_token": 0.000001875
     },
     "vertexai/gemini-1.5-flash-001": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0, 
+        "input_cost_per_token": 0,
         "output_cost_per_token": 0
     },
     "vertexai/gemini-1.5-flash-preview-0514": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0, 
+        "input_cost_per_token": 0,
         "output_cost_per_token": 0
     },
-    "vertexai/gemini-1.5-pro-001": { 
+    "vertexai/gemini-1.5-pro-001": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
         "output_cost_per_token": 0.000001875
     },
-    "vertexai/gemini-1.5-pro-preview-0514": { 
+    "vertexai/gemini-1.5-pro-preview-0514": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
         "output_cost_per_token": 0.000001875
     },
-    "vertexai/gemini-1.5-pro-preview-0215": { 
+    "vertexai/gemini-1.5-pro-preview-0215": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
         "output_cost_per_token": 0.000001875
     },
     "vertexai/gemini-1.5-pro-preview-0409": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000000625, 
+        "input_cost_per_token": 0.000000625,
         "output_cost_per_token": 0.000001875
     },
     "vertexai/gemini-experimental": {
@@ -682,7 +703,7 @@
         "max_images_per_prompt": 16,
         "max_videos_per_prompt": 1,
         "max_video_length": 2,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
         "output_cost_per_token": 0.0000005
     },
     "vertexai/gemini-1.0-pro-vision": {
@@ -692,7 +713,7 @@
         "max_images_per_prompt": 16,
         "max_videos_per_prompt": 1,
         "max_video_length": 2,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
         "output_cost_per_token": 0.0000005
     },
     "vertexai/gemini-1.0-pro-vision-001": {
@@ -702,7 +723,7 @@
         "max_images_per_prompt": 16,
         "max_videos_per_prompt": 1,
         "max_video_length": 2,
-        "input_cost_per_token": 0.00000025, 
+        "input_cost_per_token": 0.00000025,
         "output_cost_per_token": 0.0000005
     },
     "vertexai/claude-3-sonnet@20240229": {
@@ -713,7 +734,7 @@
         "output_cost_per_token": 0.000015
     },
     "vertexai/claude-3-haiku@20240307": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 200000,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.00000025,
@@ -727,49 +748,49 @@
         "output_cost_per_token": 0.000075
     },
     "cohere/command-r": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 128000,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.00000050,
         "output_cost_per_token": 0.0000015
     },
     "cohere/command-light": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 4096,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000015,
         "output_cost_per_token": 0.000015
     },
     "cohere/command-r-plus": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 128000,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000015
     },
     "cohere/command-nightly": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 4096,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000015,
         "output_cost_per_token": 0.000015
     },
      "cohere/command": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 4096,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000015,
         "output_cost_per_token": 0.000015
     },
      "cohere/command-medium-beta": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 4096,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000015,
         "output_cost_per_token": 0.000015
     },
      "cohere/command-xlarge-beta": {
-        "max_tokens": 4096, 
+        "max_tokens": 4096,
         "max_input_tokens": 4096,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000015,
diff --git a/embedchain/embedchain/llm/azure_openai.py b/embedchain/embedchain/llm/azure_openai.py
index eea4e5f8..c219270a 100644
--- a/embedchain/embedchain/llm/azure_openai.py
+++ b/embedchain/embedchain/llm/azure_openai.py
@@ -26,7 +26,7 @@ class AzureOpenAILlm(BaseLlm):
         chat = AzureChatOpenAI(
             deployment_name=config.deployment_name,
             openai_api_version=str(config.api_version) if config.api_version else "2024-02-01",
-            model_name=config.model or "gpt-3.5-turbo",
+            model_name=config.model or "gpt-4o-mini",
             temperature=config.temperature,
             max_tokens=config.max_tokens,
             streaming=config.stream,
diff --git a/embedchain/embedchain/llm/openai.py b/embedchain/embedchain/llm/openai.py
index 316854e7..a595ee60 100644
--- a/embedchain/embedchain/llm/openai.py
+++ b/embedchain/embedchain/llm/openai.py
@@ -52,7 +52,7 @@ class OpenAILlm(BaseLlm):
             messages.append(SystemMessage(content=config.system_prompt))
         messages.append(HumanMessage(content=prompt))
         kwargs = {
-            "model": config.model or "gpt-3.5-turbo",
+            "model": config.model or "gpt-4o-mini",
             "temperature": config.temperature,
             "max_tokens": config.max_tokens,
             "model_kwargs": config.model_kwargs or {},
diff --git a/embedchain/examples/chat-pdf/app.py b/embedchain/examples/chat-pdf/app.py
index fb29b73e..73800605 100644
--- a/embedchain/examples/chat-pdf/app.py
+++ b/embedchain/examples/chat-pdf/app.py
@@ -17,7 +17,7 @@ def embedchain_bot(db_path, api_key):
             "llm": {
                 "provider": "openai",
                 "config": {
-                    "model": "gpt-3.5-turbo-1106",
+                    "model": "gpt-4o-mini",
                     "temperature": 0.5,
                     "max_tokens": 1000,
                     "top_p": 1,
diff --git a/embedchain/examples/rest-api/sample-config.yaml b/embedchain/examples/rest-api/sample-config.yaml
index 1e6d60fe..c7b867e4 100644
--- a/embedchain/examples/rest-api/sample-config.yaml
+++ b/embedchain/examples/rest-api/sample-config.yaml
@@ -5,7 +5,7 @@ app:
 llm:
   provider: openai
   config:
-    model: 'gpt-3.5-turbo'
+    model: 'gpt-4o-mini'
     temperature: 0.5
     max_tokens: 1000
     top_p: 1
diff --git a/embedchain/notebooks/openai.ipynb b/embedchain/notebooks/openai.ipynb
index 408a495a..39da4bb3 100644
--- a/embedchain/notebooks/openai.ipynb
+++ b/embedchain/notebooks/openai.ipynb
@@ -80,7 +80,7 @@
         "    \"llm\": {\n",
         "        \"provider\": \"openai\",\n",
         "        \"config\": {\n",
-        "            \"model\": \"gpt-3.5-turbo\",\n",
+        "            \"model\": \"gpt-4o-mini\",\n",
         "            \"temperature\": 0.5,\n",
         "            \"max_tokens\": 1000,\n",
         "            \"top_p\": 1,\n",
diff --git a/embedchain/tests/llm/test_azure_openai.py b/embedchain/tests/llm/test_azure_openai.py
index f5ac3874..605b8f38 100644
--- a/embedchain/tests/llm/test_azure_openai.py
+++ b/embedchain/tests/llm/test_azure_openai.py
@@ -13,7 +13,7 @@ def azure_openai_llm():
     config = BaseLlmConfig(
         deployment_name="azure_deployment",
         temperature=0.7,
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
         max_tokens=50,
         system_prompt="System Prompt",
     )
@@ -40,7 +40,7 @@ def test_get_answer(azure_openai_llm):
         mock_chat.assert_called_once_with(
             deployment_name=azure_openai_llm.config.deployment_name,
             openai_api_version="2024-02-01",
-            model_name=azure_openai_llm.config.model or "gpt-3.5-turbo",
+            model_name=azure_openai_llm.config.model or "gpt-4o-mini",
             temperature=azure_openai_llm.config.temperature,
             max_tokens=azure_openai_llm.config.max_tokens,
             streaming=azure_openai_llm.config.stream,
@@ -60,7 +60,7 @@ def test_get_messages(azure_openai_llm):
 
 
 def test_when_no_deployment_name_provided():
-    config = BaseLlmConfig(temperature=0.7, model="gpt-3.5-turbo", max_tokens=50, system_prompt="System Prompt")
+    config = BaseLlmConfig(temperature=0.7, model="gpt-4o-mini", max_tokens=50, system_prompt="System Prompt")
     with pytest.raises(ValueError):
         llm = AzureOpenAILlm(config)
         llm.get_llm_model_answer("Test Prompt")
@@ -70,7 +70,7 @@ def test_with_api_version():
     config = BaseLlmConfig(
         deployment_name="azure_deployment",
         temperature=0.7,
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
         max_tokens=50,
         system_prompt="System Prompt",
         api_version="2024-02-01",
@@ -83,7 +83,7 @@ def test_with_api_version():
         mock_chat.assert_called_once_with(
             deployment_name="azure_deployment",
             openai_api_version="2024-02-01",
-            model_name="gpt-3.5-turbo",
+            model_name="gpt-4o-mini",
             temperature=0.7,
             max_tokens=50,
             streaming=False,
@@ -108,7 +108,7 @@ def test_get_llm_model_answer_with_http_client_proxies():
             max_tokens=50,
             stream=False,
             system_prompt="System prompt",
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             http_client_proxies="http://testproxy.mem0.net:8000",
         )
 
@@ -118,7 +118,7 @@ def test_get_llm_model_answer_with_http_client_proxies():
         mock_chat.assert_called_once_with(
             deployment_name="azure_deployment",
             openai_api_version="2024-02-01",
-            model_name="gpt-3.5-turbo",
+            model_name="gpt-4o-mini",
             temperature=0.7,
             max_tokens=50,
             streaming=False,
@@ -144,7 +144,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies():
             max_tokens=50,
             stream=False,
             system_prompt="System prompt",
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"},
         )
 
@@ -154,7 +154,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies():
         mock_chat.assert_called_once_with(
             deployment_name="azure_deployment",
             openai_api_version="2024-02-01",
-            model_name="gpt-3.5-turbo",
+            model_name="gpt-4o-mini",
             temperature=0.7,
             max_tokens=50,
             streaming=False,
diff --git a/embedchain/tests/llm/test_openai.py b/embedchain/tests/llm/test_openai.py
index c8f4c670..f38281bb 100644
--- a/embedchain/tests/llm/test_openai.py
+++ b/embedchain/tests/llm/test_openai.py
@@ -24,7 +24,7 @@ def config(env_config):
         top_p=0.8,
         stream=False,
         system_prompt="System prompt",
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
         http_client_proxies=None,
         http_async_client_proxies=None,
     )
@@ -211,7 +211,7 @@ def test_get_llm_model_answer_with_http_client_proxies(env_config, mocker):
         top_p=0.8,
         stream=False,
         system_prompt="System prompt",
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
         http_client_proxies="http://testproxy.mem0.net:8000",
     )
 
@@ -246,7 +246,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies(env_config, mocker)
         top_p=0.8,
         stream=False,
         system_prompt="System prompt",
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
         http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"},
     )
 
diff --git a/mem0/llms/litellm.py b/mem0/llms/litellm.py
index cfdcba34..bfe95130 100644
--- a/mem0/llms/litellm.py
+++ b/mem0/llms/litellm.py
@@ -15,7 +15,7 @@ class LiteLLM(LLMBase):
         super().__init__(config)
 
         if not self.config.model:
-            self.config.model = "gpt-4o"
+            self.config.model = "gpt-4o-mini"
 
     def _parse_response(self, response, tools):
         """
diff --git a/tests/test_proxy.py b/tests/test_proxy.py
index c7776b84..8e7e58ec 100644
--- a/tests/test_proxy.py
+++ b/tests/test_proxy.py
@@ -66,7 +66,7 @@ def test_completions_create(mock_memory_client, mock_litellm):
     mock_litellm.completion.return_value = {"choices": [{"message": {"content": "I'm doing well, thank you!"}}]}
     
     response = completions.create(
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
         messages=messages,
         user_id="test_user",
         temperature=0.7
@@ -77,7 +77,7 @@ def test_completions_create(mock_memory_client, mock_litellm):
     
     mock_litellm.completion.assert_called_once()
     call_args = mock_litellm.completion.call_args[1]
-    assert call_args['model'] == "gpt-3.5-turbo"
+    assert call_args['model'] == "gpt-4o-mini"
     assert len(call_args['messages']) == 2 
     assert call_args['temperature'] == 0.7
     
@@ -95,7 +95,7 @@ def test_completions_create_with_system_message(mock_memory_client, mock_litellm
     mock_litellm.completion.return_value = {"choices": [{"message": {"content": "I'm doing well, thank you!"}}]}
     
     completions.create(
-        model="gpt-3.5-turbo",
+        model="gpt-4o-mini",
         messages=messages,
         user_id="test_user"
     )