diff --git a/docs/components/llms/models/litellm.mdx b/docs/components/llms/models/litellm.mdx index c0db85cd..4a6f47cd 100644 --- a/docs/components/llms/models/litellm.mdx +++ b/docs/components/llms/models/litellm.mdx @@ -12,7 +12,7 @@ config = { "llm": { "provider": "litellm", "config": { - "model": "gpt-3.5-turbo", + "model": "gpt-4o-mini", "temperature": 0.2, "max_tokens": 1500, } diff --git a/embedchain/configs/chroma.yaml b/embedchain/configs/chroma.yaml index 3d1e4359..142eb05f 100644 --- a/embedchain/configs/chroma.yaml +++ b/embedchain/configs/chroma.yaml @@ -5,7 +5,7 @@ app: llm: provider: openai config: - model: 'gpt-3.5-turbo' + model: 'gpt-4o-mini' temperature: 0.5 max_tokens: 1000 top_p: 1 diff --git a/embedchain/configs/full-stack.yaml b/embedchain/configs/full-stack.yaml index dc337957..978722ea 100644 --- a/embedchain/configs/full-stack.yaml +++ b/embedchain/configs/full-stack.yaml @@ -10,7 +10,7 @@ chunker: llm: provider: openai config: - model: 'gpt-3.5-turbo' + model: 'gpt-4o-mini' temperature: 0.5 max_tokens: 1000 top_p: 1 diff --git a/embedchain/configs/opensearch.yaml b/embedchain/configs/opensearch.yaml index 4918775e..94a27b29 100644 --- a/embedchain/configs/opensearch.yaml +++ b/embedchain/configs/opensearch.yaml @@ -8,7 +8,7 @@ app: llm: provider: openai config: - model: 'gpt-3.5-turbo' + model: 'gpt-4o-mini' temperature: 0.5 max_tokens: 1000 top_p: 1 diff --git a/embedchain/docs/api-reference/advanced/configuration.mdx b/embedchain/docs/api-reference/advanced/configuration.mdx index 2949ec45..83896358 100644 --- a/embedchain/docs/api-reference/advanced/configuration.mdx +++ b/embedchain/docs/api-reference/advanced/configuration.mdx @@ -20,7 +20,7 @@ app: llm: provider: openai config: - model: 'gpt-3.5-turbo' + model: 'gpt-4o-mini' temperature: 0.5 max_tokens: 1000 top_p: 1 @@ -82,7 +82,7 @@ cache: "llm": { "provider": "openai", "config": { - "model": "gpt-3.5-turbo", + "model": "gpt-4o-mini", "temperature": 0.5, "max_tokens": 1000, "top_p": 1, @@ -140,7 +140,7 @@ config = { 'llm': { 'provider': 'openai', 'config': { - 'model': 'gpt-3.5-turbo', + 'model': 'gpt-4o-mini', 'temperature': 0.5, 'max_tokens': 1000, 'top_p': 1, @@ -206,7 +206,7 @@ Alright, let's dive into what each key means in the yaml config above: 2. `llm` Section: - `provider` (String): The provider for the language model, which is set to 'openai'. You can find the full list of llm providers in [our docs](/components/llms). - `config`: - - `model` (String): The specific model being used, 'gpt-3.5-turbo'. + - `model` (String): The specific model being used, 'gpt-4o-mini'. - `temperature` (Float): Controls the randomness of the model's output. A higher value (closer to 1) makes the output more random. - `max_tokens` (Integer): Controls how many tokens are used in the response. - `top_p` (Float): Controls the diversity of word selection. A higher value (closer to 1) makes word selection more diverse. diff --git a/embedchain/docs/components/llms.mdx b/embedchain/docs/components/llms.mdx index c2cae52c..c0034dc1 100644 --- a/embedchain/docs/components/llms.mdx +++ b/embedchain/docs/components/llms.mdx @@ -62,7 +62,7 @@ app = App.from_config(config_path="config.yaml") llm: provider: openai config: - model: 'gpt-3.5-turbo' + model: 'gpt-4o-mini' temperature: 0.5 max_tokens: 1000 top_p: 1 @@ -205,7 +205,7 @@ app = App.from_config(config_path="config.yaml") llm: provider: azure_openai config: - model: gpt-3.5-turbo + model: gpt-4o-mini deployment_name: your_llm_deployment_name temperature: 0.5 max_tokens: 1000 @@ -887,7 +887,7 @@ response = app.chat("Which companies did Elon Musk found?") llm: provider: openai config: - model: gpt-3.5-turbo + model: gpt-4o-mini temperature: 0.5 max_tokens: 1000 token_usage: true diff --git a/embedchain/docs/examples/rest-api/create.mdx b/embedchain/docs/examples/rest-api/create.mdx index 6736bf7a..35863cea 100644 --- a/embedchain/docs/examples/rest-api/create.mdx +++ b/embedchain/docs/examples/rest-api/create.mdx @@ -32,7 +32,7 @@ app: llm: provider: openai config: - model: "gpt-3.5-turbo" + model: "gpt-4o-mini" temperature: 0.5 max_tokens: 1000 top_p: 1 diff --git a/embedchain/docs/get-started/faq.mdx b/embedchain/docs/get-started/faq.mdx index fd00258c..3acae267 100644 --- a/embedchain/docs/get-started/faq.mdx +++ b/embedchain/docs/get-started/faq.mdx @@ -122,7 +122,7 @@ You can achieve this by setting `stream` to `true` in the config file. llm: provider: openai config: - model: 'gpt-3.5-turbo' + model: 'gpt-4o-mini' temperature: 0.5 max_tokens: 1000 top_p: 1 diff --git a/embedchain/embedchain/config/model_prices_and_context_window.json b/embedchain/embedchain/config/model_prices_and_context_window.json index 1a9b6e4d..744a09c0 100644 --- a/embedchain/embedchain/config/model_prices_and_context_window.json +++ b/embedchain/embedchain/config/model_prices_and_context_window.json @@ -1,6 +1,6 @@ { "openai/gpt-4": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, @@ -13,6 +13,20 @@ "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015 }, + "gpt-4o-mini": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060 + }, + "gpt-4o-mini-2024-07-18": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060 + }, "openai/gpt-4o-2024-05-13": { "max_tokens": 4096, "max_input_tokens": 128000, @@ -153,7 +167,7 @@ "openai/text-embedding-ada-002": { "max_tokens": 8191, "max_input_tokens": 8191, - "output_vector_size": 1536, + "output_vector_size": 1536, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000 }, @@ -176,7 +190,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002 - }, + }, "openai/gpt-3.5-turbo-instruct": { "max_tokens": 4096, "max_input_tokens": 8192, @@ -197,6 +211,13 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015 + }, + "azure/gpt-4o-mini": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060 }, "azure/gpt-4-turbo-2024-04-09": { "max_tokens": 4096, @@ -325,7 +346,7 @@ "max_input_tokens": 8191, "input_cost_per_token": 0.00000002, "output_cost_per_token": 0.000000 - }, + }, "mistralai/mistral-tiny": { "max_tokens": 8191, "max_input_tokens": 32000, @@ -595,77 +616,77 @@ "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, + "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, - "vertexai/gemini-1.0-pro": { + "vertexai/gemini-1.0-pro": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, + "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, - "vertexai/gemini-1.0-pro-001": { + "vertexai/gemini-1.0-pro-001": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, + "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, - "vertexai/gemini-1.0-pro-002": { + "vertexai/gemini-1.0-pro-002": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, + "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, - "vertexai/gemini-1.5-pro": { + "vertexai/gemini-1.5-pro": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, + "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, "vertexai/gemini-1.5-flash-001": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0, + "input_cost_per_token": 0, "output_cost_per_token": 0 }, "vertexai/gemini-1.5-flash-preview-0514": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0, + "input_cost_per_token": 0, "output_cost_per_token": 0 }, - "vertexai/gemini-1.5-pro-001": { + "vertexai/gemini-1.5-pro-001": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, + "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, - "vertexai/gemini-1.5-pro-preview-0514": { + "vertexai/gemini-1.5-pro-preview-0514": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, + "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, - "vertexai/gemini-1.5-pro-preview-0215": { + "vertexai/gemini-1.5-pro-preview-0215": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, + "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, "vertexai/gemini-1.5-pro-preview-0409": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, + "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, "vertexai/gemini-experimental": { @@ -682,7 +703,7 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 0.00000025, + "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/gemini-1.0-pro-vision": { @@ -692,7 +713,7 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 0.00000025, + "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/gemini-1.0-pro-vision-001": { @@ -702,7 +723,7 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 0.00000025, + "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/claude-3-sonnet@20240229": { @@ -713,7 +734,7 @@ "output_cost_per_token": 0.000015 }, "vertexai/claude-3-haiku@20240307": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, "input_cost_per_token": 0.00000025, @@ -727,49 +748,49 @@ "output_cost_per_token": 0.000075 }, "cohere/command-r": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00000050, "output_cost_per_token": 0.0000015 }, "cohere/command-light": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "cohere/command-r-plus": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015 }, "cohere/command-nightly": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "cohere/command": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "cohere/command-medium-beta": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "cohere/command-xlarge-beta": { - "max_tokens": 4096, + "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, diff --git a/embedchain/embedchain/llm/azure_openai.py b/embedchain/embedchain/llm/azure_openai.py index eea4e5f8..c219270a 100644 --- a/embedchain/embedchain/llm/azure_openai.py +++ b/embedchain/embedchain/llm/azure_openai.py @@ -26,7 +26,7 @@ class AzureOpenAILlm(BaseLlm): chat = AzureChatOpenAI( deployment_name=config.deployment_name, openai_api_version=str(config.api_version) if config.api_version else "2024-02-01", - model_name=config.model or "gpt-3.5-turbo", + model_name=config.model or "gpt-4o-mini", temperature=config.temperature, max_tokens=config.max_tokens, streaming=config.stream, diff --git a/embedchain/embedchain/llm/openai.py b/embedchain/embedchain/llm/openai.py index 316854e7..a595ee60 100644 --- a/embedchain/embedchain/llm/openai.py +++ b/embedchain/embedchain/llm/openai.py @@ -52,7 +52,7 @@ class OpenAILlm(BaseLlm): messages.append(SystemMessage(content=config.system_prompt)) messages.append(HumanMessage(content=prompt)) kwargs = { - "model": config.model or "gpt-3.5-turbo", + "model": config.model or "gpt-4o-mini", "temperature": config.temperature, "max_tokens": config.max_tokens, "model_kwargs": config.model_kwargs or {}, diff --git a/embedchain/examples/chat-pdf/app.py b/embedchain/examples/chat-pdf/app.py index fb29b73e..73800605 100644 --- a/embedchain/examples/chat-pdf/app.py +++ b/embedchain/examples/chat-pdf/app.py @@ -17,7 +17,7 @@ def embedchain_bot(db_path, api_key): "llm": { "provider": "openai", "config": { - "model": "gpt-3.5-turbo-1106", + "model": "gpt-4o-mini", "temperature": 0.5, "max_tokens": 1000, "top_p": 1, diff --git a/embedchain/examples/rest-api/sample-config.yaml b/embedchain/examples/rest-api/sample-config.yaml index 1e6d60fe..c7b867e4 100644 --- a/embedchain/examples/rest-api/sample-config.yaml +++ b/embedchain/examples/rest-api/sample-config.yaml @@ -5,7 +5,7 @@ app: llm: provider: openai config: - model: 'gpt-3.5-turbo' + model: 'gpt-4o-mini' temperature: 0.5 max_tokens: 1000 top_p: 1 diff --git a/embedchain/notebooks/openai.ipynb b/embedchain/notebooks/openai.ipynb index 408a495a..39da4bb3 100644 --- a/embedchain/notebooks/openai.ipynb +++ b/embedchain/notebooks/openai.ipynb @@ -80,7 +80,7 @@ " \"llm\": {\n", " \"provider\": \"openai\",\n", " \"config\": {\n", - " \"model\": \"gpt-3.5-turbo\",\n", + " \"model\": \"gpt-4o-mini\",\n", " \"temperature\": 0.5,\n", " \"max_tokens\": 1000,\n", " \"top_p\": 1,\n", diff --git a/embedchain/tests/llm/test_azure_openai.py b/embedchain/tests/llm/test_azure_openai.py index f5ac3874..605b8f38 100644 --- a/embedchain/tests/llm/test_azure_openai.py +++ b/embedchain/tests/llm/test_azure_openai.py @@ -13,7 +13,7 @@ def azure_openai_llm(): config = BaseLlmConfig( deployment_name="azure_deployment", temperature=0.7, - model="gpt-3.5-turbo", + model="gpt-4o-mini", max_tokens=50, system_prompt="System Prompt", ) @@ -40,7 +40,7 @@ def test_get_answer(azure_openai_llm): mock_chat.assert_called_once_with( deployment_name=azure_openai_llm.config.deployment_name, openai_api_version="2024-02-01", - model_name=azure_openai_llm.config.model or "gpt-3.5-turbo", + model_name=azure_openai_llm.config.model or "gpt-4o-mini", temperature=azure_openai_llm.config.temperature, max_tokens=azure_openai_llm.config.max_tokens, streaming=azure_openai_llm.config.stream, @@ -60,7 +60,7 @@ def test_get_messages(azure_openai_llm): def test_when_no_deployment_name_provided(): - config = BaseLlmConfig(temperature=0.7, model="gpt-3.5-turbo", max_tokens=50, system_prompt="System Prompt") + config = BaseLlmConfig(temperature=0.7, model="gpt-4o-mini", max_tokens=50, system_prompt="System Prompt") with pytest.raises(ValueError): llm = AzureOpenAILlm(config) llm.get_llm_model_answer("Test Prompt") @@ -70,7 +70,7 @@ def test_with_api_version(): config = BaseLlmConfig( deployment_name="azure_deployment", temperature=0.7, - model="gpt-3.5-turbo", + model="gpt-4o-mini", max_tokens=50, system_prompt="System Prompt", api_version="2024-02-01", @@ -83,7 +83,7 @@ def test_with_api_version(): mock_chat.assert_called_once_with( deployment_name="azure_deployment", openai_api_version="2024-02-01", - model_name="gpt-3.5-turbo", + model_name="gpt-4o-mini", temperature=0.7, max_tokens=50, streaming=False, @@ -108,7 +108,7 @@ def test_get_llm_model_answer_with_http_client_proxies(): max_tokens=50, stream=False, system_prompt="System prompt", - model="gpt-3.5-turbo", + model="gpt-4o-mini", http_client_proxies="http://testproxy.mem0.net:8000", ) @@ -118,7 +118,7 @@ def test_get_llm_model_answer_with_http_client_proxies(): mock_chat.assert_called_once_with( deployment_name="azure_deployment", openai_api_version="2024-02-01", - model_name="gpt-3.5-turbo", + model_name="gpt-4o-mini", temperature=0.7, max_tokens=50, streaming=False, @@ -144,7 +144,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies(): max_tokens=50, stream=False, system_prompt="System prompt", - model="gpt-3.5-turbo", + model="gpt-4o-mini", http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"}, ) @@ -154,7 +154,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies(): mock_chat.assert_called_once_with( deployment_name="azure_deployment", openai_api_version="2024-02-01", - model_name="gpt-3.5-turbo", + model_name="gpt-4o-mini", temperature=0.7, max_tokens=50, streaming=False, diff --git a/embedchain/tests/llm/test_openai.py b/embedchain/tests/llm/test_openai.py index c8f4c670..f38281bb 100644 --- a/embedchain/tests/llm/test_openai.py +++ b/embedchain/tests/llm/test_openai.py @@ -24,7 +24,7 @@ def config(env_config): top_p=0.8, stream=False, system_prompt="System prompt", - model="gpt-3.5-turbo", + model="gpt-4o-mini", http_client_proxies=None, http_async_client_proxies=None, ) @@ -211,7 +211,7 @@ def test_get_llm_model_answer_with_http_client_proxies(env_config, mocker): top_p=0.8, stream=False, system_prompt="System prompt", - model="gpt-3.5-turbo", + model="gpt-4o-mini", http_client_proxies="http://testproxy.mem0.net:8000", ) @@ -246,7 +246,7 @@ def test_get_llm_model_answer_with_http_async_client_proxies(env_config, mocker) top_p=0.8, stream=False, system_prompt="System prompt", - model="gpt-3.5-turbo", + model="gpt-4o-mini", http_async_client_proxies={"http://": "http://testproxy.mem0.net:8000"}, ) diff --git a/mem0/llms/litellm.py b/mem0/llms/litellm.py index cfdcba34..bfe95130 100644 --- a/mem0/llms/litellm.py +++ b/mem0/llms/litellm.py @@ -15,7 +15,7 @@ class LiteLLM(LLMBase): super().__init__(config) if not self.config.model: - self.config.model = "gpt-4o" + self.config.model = "gpt-4o-mini" def _parse_response(self, response, tools): """ diff --git a/tests/test_proxy.py b/tests/test_proxy.py index c7776b84..8e7e58ec 100644 --- a/tests/test_proxy.py +++ b/tests/test_proxy.py @@ -66,7 +66,7 @@ def test_completions_create(mock_memory_client, mock_litellm): mock_litellm.completion.return_value = {"choices": [{"message": {"content": "I'm doing well, thank you!"}}]} response = completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=messages, user_id="test_user", temperature=0.7 @@ -77,7 +77,7 @@ def test_completions_create(mock_memory_client, mock_litellm): mock_litellm.completion.assert_called_once() call_args = mock_litellm.completion.call_args[1] - assert call_args['model'] == "gpt-3.5-turbo" + assert call_args['model'] == "gpt-4o-mini" assert len(call_args['messages']) == 2 assert call_args['temperature'] == 0.7 @@ -95,7 +95,7 @@ def test_completions_create_with_system_message(mock_memory_client, mock_litellm mock_litellm.completion.return_value = {"choices": [{"message": {"content": "I'm doing well, thank you!"}}]} completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=messages, user_id="test_user" )