Remove tools from LLMs (#2363)

2025-03-14 17:42:48 +05:30
parent 4be426f762
commit ee80a43810
21 changed files with 418 additions and 1071 deletions
--- a/tests/llms/test_azure_openai.py
+++ b/tests/llms/test_azure_openai.py
@@ -20,8 +20,10 @@ def mock_openai_client():
        yield mock_client


-def test_generate_response_without_tools(mock_openai_client):
-    config = BaseLlmConfig(model=MODEL, temperature=TEMPERATURE, max_tokens=MAX_TOKENS, top_p=TOP_P)
+def test_generate_response(mock_openai_client):
+    config = BaseLlmConfig(
+        model=MODEL, temperature=TEMPERATURE, max_tokens=MAX_TOKENS, top_p=TOP_P
+    )
    llm = AzureOpenAILLM(config)
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
@@ -29,67 +31,21 @@ def test_generate_response_without_tools(mock_openai_client):
    ]

    mock_response = Mock()
-    mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))]
+    mock_response.choices = [
+        Mock(message=Mock(content="I'm doing well, thank you for asking!"))
+    ]
    mock_openai_client.chat.completions.create.return_value = mock_response

    response = llm.generate_response(messages)

-    mock_openai_client.chat.completions.create.assert_called_once_with(
-        model=MODEL, messages=messages, temperature=TEMPERATURE, max_tokens=MAX_TOKENS, top_p=TOP_P
-    )
-    assert response == "I'm doing well, thank you for asking!"
-
-
-def test_generate_response_with_tools(mock_openai_client):
-    config = BaseLlmConfig(model=MODEL, temperature=TEMPERATURE, max_tokens=MAX_TOKENS, top_p=TOP_P)
-    llm = AzureOpenAILLM(config)
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
-    ]
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "add_memory",
-                "description": "Add a memory",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
-                    "required": ["data"],
-                },
-            },
-        }
-    ]
-
-    mock_response = Mock()
-    mock_message = Mock()
-    mock_message.content = "I've added the memory for you."
-
-    mock_tool_call = Mock()
-    mock_tool_call.function.name = "add_memory"
-    mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}'
-
-    mock_message.tool_calls = [mock_tool_call]
-    mock_response.choices = [Mock(message=mock_message)]
-    mock_openai_client.chat.completions.create.return_value = mock_response
-
-    response = llm.generate_response(messages, tools=tools)
-
    mock_openai_client.chat.completions.create.assert_called_once_with(
        model=MODEL,
        messages=messages,
        temperature=TEMPERATURE,
        max_tokens=MAX_TOKENS,
        top_p=TOP_P,
-        tools=tools,
-        tool_choice="auto",
    )
-
-    assert response["content"] == "I've added the memory for you."
-    assert len(response["tool_calls"]) == 1
-    assert response["tool_calls"][0]["name"] == "add_memory"
-    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}
+    assert response == "I'm doing well, thank you for asking!"


@pytest.mark.parametrize(
@@ -128,4 +84,6 @@ def test_generate_with_http_proxies(default_headers):
            api_version=None,
            default_headers=default_headers,
        )
-        mock_http_client.assert_called_once_with(proxies="http://testproxy.mem0.net:8000")
+        mock_http_client.assert_called_once_with(
+            proxies="http://testproxy.mem0.net:8000"
+        )
--- a/tests/llms/test_deepseek.py
+++ b/tests/llms/test_deepseek.py
@@ -16,33 +16,47 @@ def mock_deepseek_client():

 def test_deepseek_llm_base_url():
    # case1: default config with deepseek official base url
-    config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key")
+    config = BaseLlmConfig(
+        model="deepseek-chat",
+        temperature=0.7,
+        max_tokens=100,
+        top_p=1.0,
+        api_key="api_key",
+    )
    llm = DeepSeekLLM(config)
    assert str(llm.client.base_url) == "https://api.deepseek.com"

    # case2: with env variable DEEPSEEK_API_BASE
    provider_base_url = "https://api.provider.com/v1/"
    os.environ["DEEPSEEK_API_BASE"] = provider_base_url
-    config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key")
+    config = BaseLlmConfig(
+        model="deepseek-chat",
+        temperature=0.7,
+        max_tokens=100,
+        top_p=1.0,
+        api_key="api_key",
+    )
    llm = DeepSeekLLM(config)
    assert str(llm.client.base_url) == provider_base_url

    # case3: with config.deepseek_base_url
    config_base_url = "https://api.config.com/v1/"
    config = BaseLlmConfig(
-        model="deepseek-chat", 
-        temperature=0.7, 
-        max_tokens=100, 
-        top_p=1.0, 
-        api_key="api_key", 
-        deepseek_base_url=config_base_url
+        model="deepseek-chat",
+        temperature=0.7,
+        max_tokens=100,
+        top_p=1.0,
+        api_key="api_key",
+        deepseek_base_url=config_base_url,
    )
    llm = DeepSeekLLM(config)
    assert str(llm.client.base_url) == config_base_url


-def test_generate_response_without_tools(mock_deepseek_client):
-    config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0)
+def test_generate_response(mock_deepseek_client):
+    config = BaseLlmConfig(
+        model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0
+    )
    llm = DeepSeekLLM(config)
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
@@ -50,64 +64,18 @@ def test_generate_response_without_tools(mock_deepseek_client):
    ]

    mock_response = Mock()
-    mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))]
+    mock_response.choices = [
+        Mock(message=Mock(content="I'm doing well, thank you for asking!"))
+    ]
    mock_deepseek_client.chat.completions.create.return_value = mock_response

    response = llm.generate_response(messages)

    mock_deepseek_client.chat.completions.create.assert_called_once_with(
-        model="deepseek-chat", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0
+        model="deepseek-chat",
+        messages=messages,
+        temperature=0.7,
+        max_tokens=100,
+        top_p=1.0,
    )
    assert response == "I'm doing well, thank you for asking!"
-
-
-def test_generate_response_with_tools(mock_deepseek_client):
-    config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0)
-    llm = DeepSeekLLM(config)
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
-    ]
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "add_memory",
-                "description": "Add a memory",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
-                    "required": ["data"],
-                },
-            },
-        }
-    ]
-
-    mock_response = Mock()
-    mock_message = Mock()
-    mock_message.content = "I've added the memory for you."
-
-    mock_tool_call = Mock()
-    mock_tool_call.function.name = "add_memory"
-    mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}'
-
-    mock_message.tool_calls = [mock_tool_call]
-    mock_response.choices = [Mock(message=mock_message)]
-    mock_deepseek_client.chat.completions.create.return_value = mock_response
-
-    response = llm.generate_response(messages, tools=tools)
-
-    mock_deepseek_client.chat.completions.create.assert_called_once_with(
-        model="deepseek-chat", 
-        messages=messages, 
-        temperature=0.7, 
-        max_tokens=100, 
-        top_p=1.0, 
-        tools=tools, 
-        tool_choice="auto"
-    )
-
-    assert response["content"] == "I've added the memory for you."
-    assert len(response["tool_calls"]) == 1
-    assert response["tool_calls"][0]["name"] == "add_memory"
-    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}
--- a/tests/llms/test_gemini_llm.py
+++ b/tests/llms/test_gemini_llm.py
@@ -17,7 +17,9 @@ def mock_gemini_client():


 def test_generate_response_without_tools(mock_gemini_client: Mock):
-    config = BaseLlmConfig(model="gemini-1.5-flash-latest", temperature=0.7, max_tokens=100, top_p=1.0)
+    config = BaseLlmConfig(
+        model="gemini-1.5-flash-latest", temperature=0.7, max_tokens=100, top_p=1.0
+    )
    llm = GeminiLLM(config)
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
@@ -34,86 +36,14 @@ def test_generate_response_without_tools(mock_gemini_client: Mock):

    mock_gemini_client.generate_content.assert_called_once_with(
        contents=[
-            {"parts": "THIS IS A SYSTEM PROMPT. YOU MUST OBEY THIS: You are a helpful assistant.", "role": "user"},
+            {
+                "parts": "THIS IS A SYSTEM PROMPT. YOU MUST OBEY THIS: You are a helpful assistant.",
+                "role": "user",
+            },
            {"parts": "Hello, how are you?", "role": "user"},
        ],
-        generation_config=GenerationConfig(temperature=0.7, max_output_tokens=100, top_p=1.0),
-        tools=None,
-        tool_config=content_types.to_tool_config(
-            {"function_calling_config": {"mode": "auto", "allowed_function_names": None}}
+        generation_config=GenerationConfig(
+            temperature=0.7, max_output_tokens=100, top_p=1.0
        ),
    )
    assert response == "I'm doing well, thank you for asking!"
-
-
-def test_generate_response_with_tools(mock_gemini_client: Mock):
-    config = BaseLlmConfig(model="gemini-1.5-flash-latest", temperature=0.7, max_tokens=100, top_p=1.0)
-    llm = GeminiLLM(config)
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
-    ]
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "add_memory",
-                "description": "Add a memory",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
-                    "required": ["data"],
-                },
-            },
-        }
-    ]
-
-    mock_tool_call = Mock()
-    mock_tool_call.name = "add_memory"
-    mock_tool_call.args = {"data": "Today is a sunny day."}
-
-    mock_part = Mock()
-    mock_part.function_call = mock_tool_call
-    mock_part.text = "I've added the memory for you."
-
-    mock_content = Mock()
-    mock_content.parts = [mock_part]
-
-    mock_message = Mock()
-    mock_message.content = mock_content
-
-    mock_response = Mock(candidates=[mock_message])
-    mock_gemini_client.generate_content.return_value = mock_response
-
-    response = llm.generate_response(messages, tools=tools)
-
-    mock_gemini_client.generate_content.assert_called_once_with(
-        contents=[
-            {"parts": "THIS IS A SYSTEM PROMPT. YOU MUST OBEY THIS: You are a helpful assistant.", "role": "user"},
-            {"parts": "Add a new memory: Today is a sunny day.", "role": "user"},
-        ],
-        generation_config=GenerationConfig(temperature=0.7, max_output_tokens=100, top_p=1.0),
-        tools=[
-            {
-                "function_declarations": [
-                    {
-                        "name": "add_memory",
-                        "description": "Add a memory",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
-                            "required": ["data"],
-                        },
-                    }
-                ]
-            }
-        ],
-        tool_config=content_types.to_tool_config(
-            {"function_calling_config": {"mode": "auto", "allowed_function_names": None}}
-        ),
-    )
-
-    assert response["content"] == "I've added the memory for you."
-    assert len(response["tool_calls"]) == 1
-    assert response["tool_calls"][0]["name"] == "add_memory"
-    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}
--- a/tests/llms/test_groq.py
+++ b/tests/llms/test_groq.py
@@ -14,8 +14,10 @@ def mock_groq_client():
        yield mock_client


-def test_generate_response_without_tools(mock_groq_client):
-    config = BaseLlmConfig(model="llama3-70b-8192", temperature=0.7, max_tokens=100, top_p=1.0)
+def test_generate_response(mock_groq_client):
+    config = BaseLlmConfig(
+        model="llama3-70b-8192", temperature=0.7, max_tokens=100, top_p=1.0
+    )
    llm = GroqLLM(config)
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
@@ -23,64 +25,18 @@ def test_generate_response_without_tools(mock_groq_client):
    ]

    mock_response = Mock()
-    mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))]
+    mock_response.choices = [
+        Mock(message=Mock(content="I'm doing well, thank you for asking!"))
+    ]
    mock_groq_client.chat.completions.create.return_value = mock_response

    response = llm.generate_response(messages)

-    mock_groq_client.chat.completions.create.assert_called_once_with(
-        model="llama3-70b-8192", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0
-    )
-    assert response == "I'm doing well, thank you for asking!"
-
-
-def test_generate_response_with_tools(mock_groq_client):
-    config = BaseLlmConfig(model="llama3-70b-8192", temperature=0.7, max_tokens=100, top_p=1.0)
-    llm = GroqLLM(config)
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
-    ]
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "add_memory",
-                "description": "Add a memory",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
-                    "required": ["data"],
-                },
-            },
-        }
-    ]
-
-    mock_response = Mock()
-    mock_message = Mock()
-    mock_message.content = "I've added the memory for you."
-
-    mock_tool_call = Mock()
-    mock_tool_call.function.name = "add_memory"
-    mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}'
-
-    mock_message.tool_calls = [mock_tool_call]
-    mock_response.choices = [Mock(message=mock_message)]
-    mock_groq_client.chat.completions.create.return_value = mock_response
-
-    response = llm.generate_response(messages, tools=tools)
-
    mock_groq_client.chat.completions.create.assert_called_once_with(
        model="llama3-70b-8192",
        messages=messages,
        temperature=0.7,
        max_tokens=100,
        top_p=1.0,
-        tools=tools,
-        tool_choice="auto",
    )
-
-    assert response["content"] == "I've added the memory for you."
-    assert len(response["tool_calls"]) == 1
-    assert response["tool_calls"][0]["name"] == "add_memory"
-    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}
+    assert response == "I'm doing well, thank you for asking!"
--- a/tests/llms/test_litellm.py
+++ b/tests/llms/test_litellm.py
@@ -13,17 +13,22 @@ def mock_litellm():


 def test_generate_response_with_unsupported_model(mock_litellm):
-    config = BaseLlmConfig(model="unsupported-model", temperature=0.7, max_tokens=100, top_p=1)
+    config = BaseLlmConfig(
+        model="unsupported-model", temperature=0.7, max_tokens=100, top_p=1
+    )
    llm = litellm.LiteLLM(config)
    messages = [{"role": "user", "content": "Hello"}]

    mock_litellm.supports_function_calling.return_value = False

-    with pytest.raises(ValueError, match="Model 'unsupported-model' in litellm does not support function calling."):
+    with pytest.raises(
+        ValueError,
+        match="Model 'unsupported-model' in LiteLLM does not support function calling.",
+    ):
        llm.generate_response(messages)


-def test_generate_response_without_tools(mock_litellm):
+def test_generate_response(mock_litellm):
    config = BaseLlmConfig(model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1)
    llm = litellm.LiteLLM(config)
    messages = [
@@ -32,7 +37,9 @@ def test_generate_response_without_tools(mock_litellm):
    ]

    mock_response = Mock()
-    mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))]
+    mock_response.choices = [
+        Mock(message=Mock(content="I'm doing well, thank you for asking!"))
+    ]
    mock_litellm.completion.return_value = mock_response
    mock_litellm.supports_function_calling.return_value = True

@@ -42,50 +49,3 @@ def test_generate_response_without_tools(mock_litellm):
        model="gpt-4o", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0
    )
    assert response == "I'm doing well, thank you for asking!"
-
-
-def test_generate_response_with_tools(mock_litellm):
-    config = BaseLlmConfig(model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1)
-    llm = litellm.LiteLLM(config)
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
-    ]
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "add_memory",
-                "description": "Add a memory",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
-                    "required": ["data"],
-                },
-            },
-        }
-    ]
-
-    mock_response = Mock()
-    mock_message = Mock()
-    mock_message.content = "I've added the memory for you."
-
-    mock_tool_call = Mock()
-    mock_tool_call.function.name = "add_memory"
-    mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}'
-
-    mock_message.tool_calls = [mock_tool_call]
-    mock_response.choices = [Mock(message=mock_message)]
-    mock_litellm.completion.return_value = mock_response
-    mock_litellm.supports_function_calling.return_value = True
-
-    response = llm.generate_response(messages, tools=tools)
-
-    mock_litellm.completion.assert_called_once_with(
-        model="gpt-4o", messages=messages, temperature=0.7, max_tokens=100, top_p=1, tools=tools, tool_choice="auto"
-    )
-
-    assert response["content"] == "I've added the memory for you."
-    assert len(response["tool_calls"]) == 1
-    assert response["tool_calls"][0]["name"] == "add_memory"
-    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}
--- a/tests/llms/test_openai.py
+++ b/tests/llms/test_openai.py
@@ -16,7 +16,9 @@ def mock_openai_client():

 def test_openai_llm_base_url():
    # case1: default config: with openai official base url
-    config = BaseLlmConfig(model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key")
+    config = BaseLlmConfig(
+        model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key"
+    )
    llm = OpenAILLM(config)
    # Note: openai client will parse the raw base_url into a URL object, which will have a trailing slash
    assert str(llm.client.base_url) == "https://api.openai.com/v1/"
@@ -24,7 +26,9 @@ def test_openai_llm_base_url():
    # case2: with env variable OPENAI_API_BASE
    provider_base_url = "https://api.provider.com/v1"
    os.environ["OPENAI_API_BASE"] = provider_base_url
-    config = BaseLlmConfig(model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key")
+    config = BaseLlmConfig(
+        model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key"
+    )
    llm = OpenAILLM(config)
    # Note: openai client will parse the raw base_url into a URL object, which will have a trailing slash
    assert str(llm.client.base_url) == provider_base_url + "/"
@@ -32,14 +36,19 @@ def test_openai_llm_base_url():
    # case3: with config.openai_base_url
    config_base_url = "https://api.config.com/v1"
    config = BaseLlmConfig(
-        model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key", openai_base_url=config_base_url
+        model="gpt-4o",
+        temperature=0.7,
+        max_tokens=100,
+        top_p=1.0,
+        api_key="api_key",
+        openai_base_url=config_base_url,
    )
    llm = OpenAILLM(config)
    # Note: openai client will parse the raw base_url into a URL object, which will have a trailing slash
    assert str(llm.client.base_url) == config_base_url + "/"


-def test_generate_response_without_tools(mock_openai_client):
+def test_generate_response(mock_openai_client):
    config = BaseLlmConfig(model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1.0)
    llm = OpenAILLM(config)
    messages = [
@@ -48,7 +57,9 @@ def test_generate_response_without_tools(mock_openai_client):
    ]

    mock_response = Mock()
-    mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))]
+    mock_response.choices = [
+        Mock(message=Mock(content="I'm doing well, thank you for asking!"))
+    ]
    mock_openai_client.chat.completions.create.return_value = mock_response

    response = llm.generate_response(messages)
@@ -57,49 +68,3 @@ def test_generate_response_without_tools(mock_openai_client):
        model="gpt-4o", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0
    )
    assert response == "I'm doing well, thank you for asking!"
-
-
-def test_generate_response_with_tools(mock_openai_client):
-    config = BaseLlmConfig(model="gpt-4o", temperature=0.7, max_tokens=100, top_p=1.0)
-    llm = OpenAILLM(config)
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
-    ]
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "add_memory",
-                "description": "Add a memory",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
-                    "required": ["data"],
-                },
-            },
-        }
-    ]
-
-    mock_response = Mock()
-    mock_message = Mock()
-    mock_message.content = "I've added the memory for you."
-
-    mock_tool_call = Mock()
-    mock_tool_call.function.name = "add_memory"
-    mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}'
-
-    mock_message.tool_calls = [mock_tool_call]
-    mock_response.choices = [Mock(message=mock_message)]
-    mock_openai_client.chat.completions.create.return_value = mock_response
-
-    response = llm.generate_response(messages, tools=tools)
-
-    mock_openai_client.chat.completions.create.assert_called_once_with(
-        model="gpt-4o", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0, tools=tools, tool_choice="auto"
-    )
-
-    assert response["content"] == "I've added the memory for you."
-    assert len(response["tool_calls"]) == 1
-    assert response["tool_calls"][0]["name"] == "add_memory"
-    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}
--- a/tests/llms/test_together.py
+++ b/tests/llms/test_together.py
@@ -14,8 +14,13 @@ def mock_together_client():
        yield mock_client


-def test_generate_response_without_tools(mock_together_client):
-    config = BaseLlmConfig(model="mistralai/Mixtral-8x7B-Instruct-v0.1", temperature=0.7, max_tokens=100, top_p=1.0)
+def test_generate_response(mock_together_client):
+    config = BaseLlmConfig(
+        model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+        temperature=0.7,
+        max_tokens=100,
+        top_p=1.0,
+    )
    llm = TogetherLLM(config)
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
@@ -23,64 +28,18 @@ def test_generate_response_without_tools(mock_together_client):
    ]

    mock_response = Mock()
-    mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))]
+    mock_response.choices = [
+        Mock(message=Mock(content="I'm doing well, thank you for asking!"))
+    ]
    mock_together_client.chat.completions.create.return_value = mock_response

    response = llm.generate_response(messages)

-    mock_together_client.chat.completions.create.assert_called_once_with(
-        model="mistralai/Mixtral-8x7B-Instruct-v0.1", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0
-    )
-    assert response == "I'm doing well, thank you for asking!"
-
-
-def test_generate_response_with_tools(mock_together_client):
-    config = BaseLlmConfig(model="mistralai/Mixtral-8x7B-Instruct-v0.1", temperature=0.7, max_tokens=100, top_p=1.0)
-    llm = TogetherLLM(config)
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
-    ]
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "add_memory",
-                "description": "Add a memory",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
-                    "required": ["data"],
-                },
-            },
-        }
-    ]
-
-    mock_response = Mock()
-    mock_message = Mock()
-    mock_message.content = "I've added the memory for you."
-
-    mock_tool_call = Mock()
-    mock_tool_call.function.name = "add_memory"
-    mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}'
-
-    mock_message.tool_calls = [mock_tool_call]
-    mock_response.choices = [Mock(message=mock_message)]
-    mock_together_client.chat.completions.create.return_value = mock_response
-
-    response = llm.generate_response(messages, tools=tools)
-
    mock_together_client.chat.completions.create.assert_called_once_with(
        model="mistralai/Mixtral-8x7B-Instruct-v0.1",
        messages=messages,
        temperature=0.7,
        max_tokens=100,
        top_p=1.0,
-        tools=tools,
-        tool_choice="auto",
    )
-
-    assert response["content"] == "I've added the memory for you."
-    assert len(response["tool_calls"]) == 1
-    assert response["tool_calls"][0]["name"] == "add_memory"
-    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}
+    assert response == "I'm doing well, thank you for asking!"