From 04bbad67ac2973a1ef5266289ddd65de1369ef6e Mon Sep 17 00:00:00 2001 From: Dev Khant Date: Thu, 23 Jan 2025 17:45:03 +0530 Subject: [PATCH] DeepSeek Integration (#2173) --- docs/components/llms/config.mdx | 1 + docs/components/llms/models/deepseek.mdx | 49 ++++++++++ docs/components/llms/overview.mdx | 1 + docs/overview.mdx | 2 +- mem0/configs/llms/base.py | 7 ++ mem0/llms/configs.py | 1 + mem0/llms/deepseek.py | 84 +++++++++++++++++ mem0/utils/factory.py | 1 + tests/llms/test_deepseek.py | 113 +++++++++++++++++++++++ 9 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 docs/components/llms/models/deepseek.mdx create mode 100644 mem0/llms/deepseek.py create mode 100644 tests/llms/test_deepseek.py diff --git a/docs/components/llms/config.mdx b/docs/components/llms/config.mdx index 626304dc..573d306d 100644 --- a/docs/components/llms/config.mdx +++ b/docs/components/llms/config.mdx @@ -72,6 +72,7 @@ Here's the table based on the provided parameters: | `ollama_base_url` | Base URL for Ollama API | Ollama | | `openai_base_url` | Base URL for OpenAI API | OpenAI | | `azure_kwargs` | Azure LLM args for initialization | AzureOpenAI | +| `deepseek_base_url` | Base URL for DeepSeek API | DeepSeek | ## Supported LLMs diff --git a/docs/components/llms/models/deepseek.mdx b/docs/components/llms/models/deepseek.mdx new file mode 100644 index 00000000..65626c11 --- /dev/null +++ b/docs/components/llms/models/deepseek.mdx @@ -0,0 +1,49 @@ +--- +title: DeepSeek +--- + +To use DeepSeek LLM models, you have to set the `DEEPSEEK_API_KEY` environment variable. You can also optionally set `DEEPSEEK_API_BASE` if you need to use a different API endpoint (defaults to "https://api.deepseek.com"). + +## Usage + +```python +import os +from mem0 import Memory + +os.environ["DEEPSEEK_API_KEY"] = "your-api-key" +os.environ["OPENAI_API_KEY"] = "your-api-key" # for embedder model + +config = { + "llm": { + "provider": "deepseek", + "config": { + "model": "deepseek-chat", # default model + "temperature": 0.2, + "max_tokens": 1500, + "top_p": 1.0 + } + } +} + +m = Memory.from_config(config) +m.add("Likes to play cricket on weekends", user_id="alice", metadata={"category": "hobbies"}) +``` + +You can also configure the API base URL in the config: + +```python +config = { + "llm": { + "provider": "deepseek", + "config": { + "model": "deepseek-chat", + "deepseek_base_url": "https://your-custom-endpoint.com", + "api_key": "your-api-key" # alternatively to using environment variable + } + } +} +``` + +## Config + +All available parameters for the `deepseek` config are present in [Master List of All Params in Config](../config). \ No newline at end of file diff --git a/docs/components/llms/overview.mdx b/docs/components/llms/overview.mdx index d4a3c415..30bbab68 100644 --- a/docs/components/llms/overview.mdx +++ b/docs/components/llms/overview.mdx @@ -24,6 +24,7 @@ To view all supported llms, visit the [Supported LLMs](./models). + ## Structured vs Unstructured Outputs diff --git a/docs/overview.mdx b/docs/overview.mdx index 119ef47e..5399ef9b 100644 --- a/docs/overview.mdx +++ b/docs/overview.mdx @@ -3,7 +3,7 @@ title: Overview --- - 🎉 Exciting news! [CrewAI](https://crewai.com) now supports Mem0 for memory. + 🎉 Exciting news! We have added support for [DeepSeek](https://deepseek.com). [Mem0](https://mem0.dev/wd) (pronounced "mem-zero") enhances AI assistants and agents with an intelligent memory layer, enabling personalized AI interactions. Mem0 remembers user preferences and traits and continuously updates over time, making it ideal for applications like customer support chatbots and AI assistants. diff --git a/mem0/configs/llms/base.py b/mem0/configs/llms/base.py index f9d63485..78ad13b1 100644 --- a/mem0/configs/llms/base.py +++ b/mem0/configs/llms/base.py @@ -33,6 +33,8 @@ class BaseLlmConfig(ABC): azure_kwargs: Optional[AzureConfig] = {}, # AzureOpenAI specific http_client_proxies: Optional[Union[Dict, str]] = None, + # DeepSeek specific + deepseek_base_url: Optional[str] = None, ): """ Initializes a configuration class instance for the LLM. @@ -69,6 +71,8 @@ class BaseLlmConfig(ABC): :type azure_kwargs: Optional[Dict[str, Any]], defaults a dict inside init :param http_client_proxies: The proxy server(s) settings used to create self.http_client, defaults to None :type http_client_proxies: Optional[Dict | str], optional + :param deepseek_base_url: DeepSeek base URL to be use, defaults to None + :type deepseek_base_url: Optional[str], optional """ self.model = model @@ -92,5 +96,8 @@ class BaseLlmConfig(ABC): # Ollama specific self.ollama_base_url = ollama_base_url + # DeepSeek specific + self.deepseek_base_url = deepseek_base_url + # AzureOpenAI specific self.azure_kwargs = AzureConfig(**azure_kwargs) or {} diff --git a/mem0/llms/configs.py b/mem0/llms/configs.py index 2caaf12d..5a806903 100644 --- a/mem0/llms/configs.py +++ b/mem0/llms/configs.py @@ -22,6 +22,7 @@ class LlmConfig(BaseModel): "openai_structured", "azure_openai_structured", "gemini", + "deepseek", ): return v else: diff --git a/mem0/llms/deepseek.py b/mem0/llms/deepseek.py new file mode 100644 index 00000000..46a805f0 --- /dev/null +++ b/mem0/llms/deepseek.py @@ -0,0 +1,84 @@ +import json +import os +from typing import Dict, List, Optional + +from openai import OpenAI + +from mem0.configs.llms.base import BaseLlmConfig +from mem0.llms.base import LLMBase + + +class DeepSeekLLM(LLMBase): + def __init__(self, config: Optional[BaseLlmConfig] = None): + super().__init__(config) + + if not self.config.model: + self.config.model = "deepseek-chat" + + api_key = self.config.api_key or os.getenv("DEEPSEEK_API_KEY") + base_url = self.config.deepseek_base_url or os.getenv("DEEPSEEK_API_BASE") or "https://api.deepseek.com" + self.client = OpenAI(api_key=api_key, base_url=base_url) + + def _parse_response(self, response, tools): + """ + Process the response based on whether tools are used or not. + + Args: + response: The raw response from API. + tools: The list of tools provided in the request. + + Returns: + str or dict: The processed response. + """ + if tools: + processed_response = { + "content": response.choices[0].message.content, + "tool_calls": [], + } + + if response.choices[0].message.tool_calls: + for tool_call in response.choices[0].message.tool_calls: + processed_response["tool_calls"].append( + { + "name": tool_call.function.name, + "arguments": json.loads(tool_call.function.arguments), + } + ) + + return processed_response + else: + return response.choices[0].message.content + + def generate_response( + self, + messages: List[Dict[str, str]], + response_format=None, + tools: Optional[List[Dict]] = None, + tool_choice: str = "auto", + ): + """ + Generate a response based on the given messages using DeepSeek. + + Args: + messages (list): List of message dicts containing 'role' and 'content'. + response_format (str or object, optional): Format of the response. Defaults to "text". + tools (list, optional): List of tools that the model can call. Defaults to None. + tool_choice (str, optional): Tool choice method. Defaults to "auto". + + Returns: + str: The generated response. + """ + params = { + "model": self.config.model, + "messages": messages, + "temperature": self.config.temperature, + "max_tokens": self.config.max_tokens, + "top_p": self.config.top_p, + } + + if tools: + params["tools"] = tools + params["tool_choice"] = tool_choice + + response = self.client.chat.completions.create(**params) + return self._parse_response(response, tools) diff --git a/mem0/utils/factory.py b/mem0/utils/factory.py index 0489bfa6..4ff9d15e 100644 --- a/mem0/utils/factory.py +++ b/mem0/utils/factory.py @@ -23,6 +23,7 @@ class LlmFactory: "anthropic": "mem0.llms.anthropic.AnthropicLLM", "azure_openai_structured": "mem0.llms.azure_openai_structured.AzureOpenAIStructuredLLM", "gemini": "mem0.llms.gemini.GeminiLLM", + "deepseek": "mem0.llms.deepseek.DeepSeekLLM", } @classmethod diff --git a/tests/llms/test_deepseek.py b/tests/llms/test_deepseek.py new file mode 100644 index 00000000..47e60cd3 --- /dev/null +++ b/tests/llms/test_deepseek.py @@ -0,0 +1,113 @@ +from unittest.mock import Mock, patch +import os +import pytest + +from mem0.configs.llms.base import BaseLlmConfig +from mem0.llms.deepseek import DeepSeekLLM + + +@pytest.fixture +def mock_deepseek_client(): + with patch("mem0.llms.deepseek.OpenAI") as mock_openai: + mock_client = Mock() + mock_openai.return_value = mock_client + yield mock_client + + +def test_deepseek_llm_base_url(): + # case1: default config with deepseek official base url + config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key") + llm = DeepSeekLLM(config) + assert str(llm.client.base_url) == "https://api.deepseek.com" + + # case2: with env variable DEEPSEEK_API_BASE + provider_base_url = "https://api.provider.com/v1/" + os.environ["DEEPSEEK_API_BASE"] = provider_base_url + config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0, api_key="api_key") + llm = DeepSeekLLM(config) + assert str(llm.client.base_url) == provider_base_url + + # case3: with config.deepseek_base_url + config_base_url = "https://api.config.com/v1/" + config = BaseLlmConfig( + model="deepseek-chat", + temperature=0.7, + max_tokens=100, + top_p=1.0, + api_key="api_key", + deepseek_base_url=config_base_url + ) + llm = DeepSeekLLM(config) + assert str(llm.client.base_url) == config_base_url + + +def test_generate_response_without_tools(mock_deepseek_client): + config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0) + llm = DeepSeekLLM(config) + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello, how are you?"}, + ] + + mock_response = Mock() + mock_response.choices = [Mock(message=Mock(content="I'm doing well, thank you for asking!"))] + mock_deepseek_client.chat.completions.create.return_value = mock_response + + response = llm.generate_response(messages) + + mock_deepseek_client.chat.completions.create.assert_called_once_with( + model="deepseek-chat", messages=messages, temperature=0.7, max_tokens=100, top_p=1.0 + ) + assert response == "I'm doing well, thank you for asking!" + + +def test_generate_response_with_tools(mock_deepseek_client): + config = BaseLlmConfig(model="deepseek-chat", temperature=0.7, max_tokens=100, top_p=1.0) + llm = DeepSeekLLM(config) + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Add a new memory: Today is a sunny day."}, + ] + tools = [ + { + "type": "function", + "function": { + "name": "add_memory", + "description": "Add a memory", + "parameters": { + "type": "object", + "properties": {"data": {"type": "string", "description": "Data to add to memory"}}, + "required": ["data"], + }, + }, + } + ] + + mock_response = Mock() + mock_message = Mock() + mock_message.content = "I've added the memory for you." + + mock_tool_call = Mock() + mock_tool_call.function.name = "add_memory" + mock_tool_call.function.arguments = '{"data": "Today is a sunny day."}' + + mock_message.tool_calls = [mock_tool_call] + mock_response.choices = [Mock(message=mock_message)] + mock_deepseek_client.chat.completions.create.return_value = mock_response + + response = llm.generate_response(messages, tools=tools) + + mock_deepseek_client.chat.completions.create.assert_called_once_with( + model="deepseek-chat", + messages=messages, + temperature=0.7, + max_tokens=100, + top_p=1.0, + tools=tools, + tool_choice="auto" + ) + + assert response["content"] == "I've added the memory for you." + assert len(response["tool_calls"]) == 1 + assert response["tool_calls"][0]["name"] == "add_memory" + assert response["tool_calls"][0]["arguments"] == {"data": "Today is a sunny day."} \ No newline at end of file