DeepSeek Integration (#2173)

2025-01-23 17:45:03 +05:30
parent e1b527b73f
commit 04bbad67ac
9 changed files with 258 additions and 1 deletions
--- a/docs/components/llms/config.mdx
+++ b/docs/components/llms/config.mdx
@@ -72,6 +72,7 @@ Here's the table based on the provided parameters:
 | `ollama_base_url`    | Base URL for Ollama API                       | Ollama            |
 | `openai_base_url`    | Base URL for OpenAI API                       | OpenAI            |
 | `azure_kwargs`       | Azure LLM args for initialization             | AzureOpenAI       |
+| `deepseek_base_url`  | Base URL for DeepSeek API                     | DeepSeek          |


 ## Supported LLMs
--- a/docs/components/llms/models/deepseek.mdx
+++ b/docs/components/llms/models/deepseek.mdx
@@ -0,0 +1,49 @@
+---
+title: DeepSeek
+---
+
+To use DeepSeek LLM models, you have to set the `DEEPSEEK_API_KEY` environment variable. You can also optionally set `DEEPSEEK_API_BASE` if you need to use a different API endpoint (defaults to "https://api.deepseek.com").
+
+## Usage
+
+```python
+import os
+from mem0 import Memory
+
+os.environ["DEEPSEEK_API_KEY"] = "your-api-key"
+os.environ["OPENAI_API_KEY"] = "your-api-key" # for embedder model
+
+config = {
+    "llm": {
+        "provider": "deepseek",
+        "config": {
+            "model": "deepseek-chat",  # default model
+            "temperature": 0.2,
+            "max_tokens": 1500,
+            "top_p": 1.0
+        }
+    }
+}
+
+m = Memory.from_config(config)
+m.add("Likes to play cricket on weekends", user_id="alice", metadata={"category": "hobbies"})
+```
+
+You can also configure the API base URL in the config:
+
+```python
+config = {
+    "llm": {
+        "provider": "deepseek",
+        "config": {
+            "model": "deepseek-chat",
+            "deepseek_base_url": "https://your-custom-endpoint.com",
+            "api_key": "your-api-key"  # alternatively to using environment variable
+        }
+    }
+}
+```
+
+## Config
+
+All available parameters for the `deepseek` config are present in [Master List of All Params in Config](../config).
--- a/docs/components/llms/overview.mdx
+++ b/docs/components/llms/overview.mdx
@@ -24,6 +24,7 @@ To view all supported llms, visit the [Supported LLMs](./models).
    <Card title="Google AI" href="/components/llms/models/google_ai"></Card>
    <Card title="AWS bedrock" href="/components/llms/models/aws_bedrock"></Card>
    <Card title="Gemini" href="/components/llms/models/gemini"></Card>
+    <Card title="DeepSeek" href="/components/llms/models/deepseek"></Card>
 </CardGroup>

 ## Structured vs Unstructured Outputs
--- a/docs/overview.mdx
+++ b/docs/overview.mdx
@@ -3,7 +3,7 @@ title: Overview
 ---

 <Note type="info">
-  🎉 Exciting news! [CrewAI](https://crewai.com) now supports Mem0 for memory.
+  🎉 Exciting news! We have added support for [DeepSeek](https://deepseek.com).
 </Note>

 [Mem0](https://mem0.dev/wd) (pronounced "mem-zero") enhances AI assistants and agents with an intelligent memory layer, enabling personalized AI interactions. Mem0 remembers user preferences and traits and continuously updates over time, making it ideal for applications like customer support chatbots and AI assistants.
--- a/mem0/configs/llms/base.py
+++ b/mem0/configs/llms/base.py
@@ -33,6 +33,8 @@ class BaseLlmConfig(ABC):
        azure_kwargs: Optional[AzureConfig] = {},
        # AzureOpenAI specific
        http_client_proxies: Optional[Union[Dict, str]] = None,
+        # DeepSeek specific
+        deepseek_base_url: Optional[str] = None,
    ):
        """
        Initializes a configuration class instance for the LLM.
@@ -69,6 +71,8 @@ class BaseLlmConfig(ABC):
        :type azure_kwargs: Optional[Dict[str, Any]], defaults a dict inside init
        :param http_client_proxies: The proxy server(s) settings used to create self.http_client, defaults to None
        :type http_client_proxies: Optional[Dict | str], optional
+        :param deepseek_base_url: DeepSeek base URL to be use, defaults to None
+        :type deepseek_base_url: Optional[str], optional
        """

        self.model = model
@@ -92,5 +96,8 @@ class BaseLlmConfig(ABC):
        # Ollama specific
        self.ollama_base_url = ollama_base_url

+        # DeepSeek specific
+        self.deepseek_base_url = deepseek_base_url
+
        # AzureOpenAI specific
        self.azure_kwargs = AzureConfig(**azure_kwargs) or {}
--- a/mem0/llms/configs.py
+++ b/mem0/llms/configs.py
@@ -22,6 +22,7 @@ class LlmConfig(BaseModel):
            "openai_structured",
            "azure_openai_structured",
            "gemini",
+            "deepseek",
        ):
            return v
        else:
--- a/mem0/llms/deepseek.py
+++ b/mem0/llms/deepseek.py
@@ -0,0 +1,84 @@
+import json
+import os
+from typing import Dict, List, Optional
+
+from openai import OpenAI
+
+from mem0.configs.llms.base import BaseLlmConfig
+from mem0.llms.base import LLMBase
+
+
+class DeepSeekLLM(LLMBase):
+    def __init__(self, config: Optional[BaseLlmConfig] = None):
+        super().__init__(config)
+
+        if not self.config.model:
+            self.config.model = "deepseek-chat"
+
+        api_key = self.config.api_key or os.getenv("DEEPSEEK_API_KEY")
+        base_url = self.config.deepseek_base_url or os.getenv("DEEPSEEK_API_BASE") or "https://api.deepseek.com"
+        self.client = OpenAI(api_key=api_key, base_url=base_url)
+
+    def _parse_response(self, response, tools):
+        """
+        Process the response based on whether tools are used or not.
+
+        Args:
+            response: The raw response from API.
+            tools: The list of tools provided in the request.
+
+        Returns:
+            str or dict: The processed response.
+        """
+        if tools:
+            processed_response = {
+                "content": response.choices[0].message.content,
+                "tool_calls": [],
+            }
+
+            if response.choices[0].message.tool_calls:
+                for tool_call in response.choices[0].message.tool_calls:
+                    processed_response["tool_calls"].append(
+                        {
+                            "name": tool_call.function.name,
+                            "arguments": json.loads(tool_call.function.arguments),
+                        }
+                    )
+
+            return processed_response
+        else:
+            return response.choices[0].message.content
+
+    def generate_response(
+        self,
+        messages: List[Dict[str, str]],
+        response_format=None,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: str = "auto",
+    ):
+        """
+        Generate a response based on the given messages using DeepSeek.
+
+        Args:
+            messages (list): List of message dicts containing 'role' and 'content'.
+            response_format (str or object, optional): Format of the response. Defaults to "text".
+            tools (list, optional): List of tools that the model can call. Defaults to None.
+            tool_choice (str, optional): Tool choice method. Defaults to "auto".
+
+        Returns:
+            str: The generated response.
+        """
+        params = {
+            "model": self.config.model,
+            "messages": messages,
+            "temperature": self.config.temperature,
+            "max_tokens": self.config.max_tokens,
+            "top_p": self.config.top_p,
+        }
+
+        if tools:
+            params["tools"] = tools
+            params["tool_choice"] = tool_choice
+
+        response = self.client.chat.completions.create(**params)
+        return self._parse_response(response, tools)
--- a/mem0/utils/factory.py
+++ b/mem0/utils/factory.py
@@ -23,6 +23,7 @@ class LlmFactory:
        "anthropic": "mem0.llms.anthropic.AnthropicLLM",
        "azure_openai_structured": "mem0.llms.azure_openai_structured.AzureOpenAIStructuredLLM",
        "gemini": "mem0.llms.gemini.GeminiLLM",
+        "deepseek": "mem0.llms.deepseek.DeepSeekLLM",
    }

    @classmethod
--- a/tests/llms/test_deepseek.py
+++ b/tests/llms/test_deepseek.py
@@ -0,0 +1,113 @@
+from unittest.mock import Mock, patch
+import os
+import pytest
+
+from mem0.configs.llms.base import BaseLlmConfig
+from mem0.llms.deepseek import DeepSeekLLM
+
+
+@pytest.fixture
+def mock_deepseek_client():
+    with patch("mem0.llms.deepseek.OpenAI") as mock_openai:
+        mock_client = Mock()
+        mock_openai.return_value = mock_client
+        yield mock_client
+
+
+def test_deepseek_llm_base_url():
+    # case1: default config with deepseek official base url
+    config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key")
+    llm = DeepSeekLLM(config)
+    assert str(llm.client.base_url) == "https://api.deepseek.com"
+
+    # case2: with env variable DEEPSEEK_API_BASE
+    provider_base_url = "https://api.provider.com/v1/"
+    os.environ["DEEPSEEK_API_BASE"] = provider_base_url
+    config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key")
+    llm = DeepSeekLLM(config)
+    assert str(llm.client.base_url) == provider_base_url
+
+    # case3: with config.deepseek_base_url
+    config_base_url = "https://api.config.com/v1/"
+    config = BaseLlmConfig(
+        model="deepseek-chat", 
+        temperature=0.7, 
+        max_tokens=100, 
+        top_p=1.0, 
+        api_key="api_key", 
+        deepseek_base_url=config_base_url
+    )
+    llm = DeepSeekLLM(config)
+    assert str(llm.client.base_url) == config_base_url
+
+
+def test_generate_response_without_tools(mock_deepseek_client):
+    config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0)
+    llm = DeepSeekLLM(config)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello, how are you?"},
+    ]
+
+    mock_response = Mock()
+    mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))]
+    mock_deepseek_client.chat.completions.create.return_value = mock_response
+
+    response = llm.generate_response(messages)
+
+    mock_deepseek_client.chat.completions.create.assert_called_once_with(
+        model="deepseek-chat", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0
+    )
+    assert response == "I'm doing well, thank you for asking!"
+
+
+def test_generate_response_with_tools(mock_deepseek_client):
+    config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0)
+    llm = DeepSeekLLM(config)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Add a new memory: Today is a sunny day."},
+    ]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "add_memory",
+                "description": "Add a memory",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"data": {"type": "string", "description": "Data to add to memory"}},
+                    "required": ["data"],
+                },
+            },
+        }
+    ]
+
+    mock_response = Mock()
+    mock_message = Mock()
+    mock_message.content = "I've added the memory for you."
+
+    mock_tool_call = Mock()
+    mock_tool_call.function.name = "add_memory"
+    mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}'
+
+    mock_message.tool_calls = [mock_tool_call]
+    mock_response.choices = [Mock(message=mock_message)]
+    mock_deepseek_client.chat.completions.create.return_value = mock_response
+
+    response = llm.generate_response(messages, tools=tools)
+
+    mock_deepseek_client.chat.completions.create.assert_called_once_with(
+        model="deepseek-chat", 
+        messages=messages, 
+        temperature=0.7, 
+        max_tokens=100, 
+        top_p=1.0, 
+        tools=tools, 
+        tool_choice="auto"
+    )
+
+    assert response["content"] == "I've added the memory for you."
+    assert len(response["tool_calls"]) == 1
+    assert response["tool_calls"][0]["name"] == "add_memory"
+    assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."}