Feature/vllm support (#2981)

2025-06-23 13:18:38 +05:30
parent 386d8b87ae
commit 89499aedbe
10 changed files with 430 additions and 1 deletions
--- a/tests/llms/test_vllm.py
+++ b/tests/llms/test_vllm.py
@@ -0,0 +1,80 @@
+from unittest.mock import Mock, patch
+
+import pytest
+
+from mem0.configs.llms.base import BaseLlmConfig
+from mem0.llms.vllm import VllmLLM
+
+
+@pytest.fixture
+def mock_vllm_client():
+    with patch("mem0.llms.vllm.OpenAI") as mock_openai:
+        mock_client = Mock()
+        mock_openai.return_value = mock_client
+        yield mock_client
+
+
+def test_generate_response_without_tools(mock_vllm_client):
+    config = BaseLlmConfig(model="Qwen/Qwen2.5-32B-Instruct", temperature=0.7, max_tokens=100, top_p=1.0)
+    llm = VllmLLM(config)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello, how are you?"},
+    ]
+
+    mock_response = Mock()
+    mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))]
+    mock_vllm_client.chat.completions.create.return_value = mock_response
+
+    response = llm.generate_response(messages)
+
+    mock_vllm_client.chat.completions.create.assert_called_once_with(
+        model="Qwen/Qwen2.5-32B-Instruct", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0
+    )
+    assert response == "I'm doing well, thank you for asking!"
+
+
+def test_generate_response_with_tools(mock_vllm_client):
+    config = BaseLlmConfig(model="Qwen/Qwen2.5-32B-Instruct", temperature=0.7, max_tokens=100, top_p=1.0)
+    llm = VllmLLM(config)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
+    ]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "add_memory",
+                "description": "Add a memory",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
+                    "required": ["data"],
+                },
+            },
+        }
+    ]
+
+    mock_response = Mock()
+    mock_message = Mock()
+    mock_message.content = "I've added the memory for you."
+
+    mock_tool_call = Mock()
+    mock_tool_call.function.name = "add_memory"
+    mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}'
+
+    mock_message.tool_calls = [mock_tool_call]
+    mock_response.choices = [Mock(message=mock_message)]
+    mock_vllm_client.chat.completions.create.return_value = mock_response
+
+    response = llm.generate_response(messages, tools=tools)
+
+    mock_vllm_client.chat.completions.create.assert_called_once_with(
+        model="Qwen/Qwen2.5-32B-Instruct", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0, tools=tools, tool_choice="auto"
+    )
+
+    assert response["content"] == "I've added the memory for you."
+    assert len(response["tool_calls"]) == 1
+    assert response["tool_calls"][0]["name"] == "add_memory"
+    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}