diff --git a/Makefile b/Makefile index 032be346..e905addd 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ install: install_all: poetry install - poetry run pip install groq together boto3 litellm ollama + poetry run pip install groq together boto3 litellm ollama sentence_transformers # Format code with ruff format: diff --git a/tests/embeddings/test_azure_openai_embeddings.py b/tests/embeddings/test_azure_openai_embeddings.py new file mode 100644 index 00000000..3425ea48 --- /dev/null +++ b/tests/embeddings/test_azure_openai_embeddings.py @@ -0,0 +1,46 @@ +import pytest +from unittest.mock import Mock, patch +from mem0.embeddings.azure_openai import AzureOpenAIEmbedding +from mem0.configs.embeddings.base import BaseEmbedderConfig + + +@pytest.fixture +def mock_openai_client(): + with patch("mem0.embeddings.azure_openai.AzureOpenAI") as mock_openai: + mock_client = Mock() + mock_openai.return_value = mock_client + yield mock_client + + +def test_embed_text(mock_openai_client): + config = BaseEmbedderConfig(model="text-embedding-ada-002") + embedder = AzureOpenAIEmbedding(config) + + mock_embedding_response = Mock() + mock_embedding_response.data = [Mock(embedding=[0.1, 0.2, 0.3])] + mock_openai_client.embeddings.create.return_value = mock_embedding_response + + text = "Hello, this is a test." + embedding = embedder.embed(text) + + mock_openai_client.embeddings.create.assert_called_once_with( + input=["Hello, this is a test."], model="text-embedding-ada-002" + ) + assert embedding == [0.1, 0.2, 0.3] + + +def test_embed_text_with_newlines(mock_openai_client): + config = BaseEmbedderConfig(model="text-embedding-ada-002") + embedder = AzureOpenAIEmbedding(config) + + mock_embedding_response = Mock() + mock_embedding_response.data = [Mock(embedding=[0.4, 0.5, 0.6])] + mock_openai_client.embeddings.create.return_value = mock_embedding_response + + text = "Hello,\nthis is a test\nwith newlines." + embedding = embedder.embed(text) + + mock_openai_client.embeddings.create.assert_called_once_with( + input=["Hello, this is a test with newlines."], model="text-embedding-ada-002" + ) + assert embedding == [0.4, 0.5, 0.6] diff --git a/tests/embeddings/test_huggingface_embeddings.py b/tests/embeddings/test_huggingface_embeddings.py new file mode 100644 index 00000000..13a36b0c --- /dev/null +++ b/tests/embeddings/test_huggingface_embeddings.py @@ -0,0 +1,74 @@ +import pytest +from unittest.mock import Mock, patch +from mem0.embeddings.huggingface import HuggingFaceEmbedding +from mem0.configs.embeddings.base import BaseEmbedderConfig + + +@pytest.fixture +def mock_sentence_transformer(): + with patch("mem0.embeddings.huggingface.SentenceTransformer") as mock_transformer: + mock_model = Mock() + mock_transformer.return_value = mock_model + yield mock_model + + +def test_embed_default_model(mock_sentence_transformer): + config = BaseEmbedderConfig() + embedder = HuggingFaceEmbedding(config) + + mock_sentence_transformer.encode.return_value = [0.1, 0.2, 0.3] + result = embedder.embed("Hello world") + + mock_sentence_transformer.encode.assert_called_once_with("Hello world") + + assert result == [0.1, 0.2, 0.3] + + +def test_embed_custom_model(mock_sentence_transformer): + config = BaseEmbedderConfig(model="paraphrase-MiniLM-L6-v2") + embedder = HuggingFaceEmbedding(config) + + mock_sentence_transformer.encode.return_value = [0.4, 0.5, 0.6] + result = embedder.embed("Custom model test") + + mock_sentence_transformer.encode.assert_called_once_with("Custom model test") + + assert result == [0.4, 0.5, 0.6] + + +def test_embed_with_model_kwargs(mock_sentence_transformer): + config = BaseEmbedderConfig( + model="all-MiniLM-L6-v2", model_kwargs={"device": "cuda"} + ) + embedder = HuggingFaceEmbedding(config) + + mock_sentence_transformer.encode.return_value = [0.7, 0.8, 0.9] + result = embedder.embed("Test with device") + + mock_sentence_transformer.encode.assert_called_once_with("Test with device") + + assert result == [0.7, 0.8, 0.9] + + +def test_embed_sets_embedding_dims(mock_sentence_transformer): + config = BaseEmbedderConfig() + + mock_sentence_transformer.get_sentence_embedding_dimension.return_value = 384 + embedder = HuggingFaceEmbedding(config) + + assert embedder.config.embedding_dims == 384 + mock_sentence_transformer.get_sentence_embedding_dimension.assert_called_once() + + +def test_embed_with_custom_embedding_dims(mock_sentence_transformer): + config = BaseEmbedderConfig(model="all-mpnet-base-v2", embedding_dims=768) + embedder = HuggingFaceEmbedding(config) + + mock_sentence_transformer.encode.return_value = [1.0, 1.1, 1.2] + result = embedder.embed("Custom embedding dims") + + mock_sentence_transformer.encode.assert_called_once_with("Custom embedding dims") + + assert embedder.config.embedding_dims == 768 + + assert result == [1.0, 1.1, 1.2] diff --git a/tests/embeddings/test_ollama_embeddings.py b/tests/embeddings/test_ollama_embeddings.py new file mode 100644 index 00000000..821eaecf --- /dev/null +++ b/tests/embeddings/test_ollama_embeddings.py @@ -0,0 +1,43 @@ +import pytest +from unittest.mock import Mock, patch +from mem0.embeddings.ollama import OllamaEmbedding +from mem0.configs.embeddings.base import BaseEmbedderConfig + + +@pytest.fixture +def mock_ollama_client(): + with patch("mem0.embeddings.ollama.Client") as mock_ollama: + mock_client = Mock() + mock_client.list.return_value = {"models": [{"name": "nomic-embed-text"}]} + mock_ollama.return_value = mock_client + yield mock_client + + +def test_embed_text(mock_ollama_client): + config = BaseEmbedderConfig(model="nomic-embed-text", embedding_dims=512) + embedder = OllamaEmbedding(config) + + mock_response = {"embedding": [0.1, 0.2, 0.3, 0.4, 0.5]} + mock_ollama_client.embeddings.return_value = mock_response + + text = "Sample text to embed." + embedding = embedder.embed(text) + + mock_ollama_client.embeddings.assert_called_once_with( + model="nomic-embed-text", prompt=text + ) + + assert embedding == [0.1, 0.2, 0.3, 0.4, 0.5] + + +def test_ensure_model_exists(mock_ollama_client): + config = BaseEmbedderConfig(model="nomic-embed-text", embedding_dims=512) + embedder = OllamaEmbedding(config) + + mock_ollama_client.pull.assert_not_called() + + mock_ollama_client.list.return_value = {"models": []} + + embedder._ensure_model_exists() + + mock_ollama_client.pull.assert_called_once_with("nomic-embed-text") diff --git a/tests/embeddings/test_openai_embeddings.py b/tests/embeddings/test_openai_embeddings.py new file mode 100644 index 00000000..875d5149 --- /dev/null +++ b/tests/embeddings/test_openai_embeddings.py @@ -0,0 +1,88 @@ +import pytest +from unittest.mock import Mock, patch +from mem0.embeddings.openai import OpenAIEmbedding +from mem0.configs.embeddings.base import BaseEmbedderConfig + + +@pytest.fixture +def mock_openai_client(): + with patch("mem0.embeddings.openai.OpenAI") as mock_openai: + mock_client = Mock() + mock_openai.return_value = mock_client + yield mock_client + + +def test_embed_default_model(mock_openai_client): + config = BaseEmbedderConfig() + embedder = OpenAIEmbedding(config) + mock_response = Mock() + mock_response.data = [Mock(embedding=[0.1, 0.2, 0.3])] + mock_openai_client.embeddings.create.return_value = mock_response + + result = embedder.embed("Hello world") + + mock_openai_client.embeddings.create.assert_called_once_with( + input=["Hello world"], model="text-embedding-3-small" + ) + assert result == [0.1, 0.2, 0.3] + + +def test_embed_custom_model(mock_openai_client): + config = BaseEmbedderConfig(model="text-embedding-2-medium", embedding_dims=1024) + embedder = OpenAIEmbedding(config) + mock_response = Mock() + mock_response.data = [Mock(embedding=[0.4, 0.5, 0.6])] + mock_openai_client.embeddings.create.return_value = mock_response + + result = embedder.embed("Test embedding") + + mock_openai_client.embeddings.create.assert_called_once_with( + input=["Test embedding"], model="text-embedding-2-medium" + ) + assert result == [0.4, 0.5, 0.6] + + +def test_embed_removes_newlines(mock_openai_client): + config = BaseEmbedderConfig() + embedder = OpenAIEmbedding(config) + mock_response = Mock() + mock_response.data = [Mock(embedding=[0.7, 0.8, 0.9])] + mock_openai_client.embeddings.create.return_value = mock_response + + result = embedder.embed("Hello\nworld") + + mock_openai_client.embeddings.create.assert_called_once_with( + input=["Hello world"], model="text-embedding-3-small" + ) + assert result == [0.7, 0.8, 0.9] + + +def test_embed_without_api_key_env_var(mock_openai_client): + config = BaseEmbedderConfig(api_key="test_key") + embedder = OpenAIEmbedding(config) + mock_response = Mock() + mock_response.data = [Mock(embedding=[1.0, 1.1, 1.2])] + mock_openai_client.embeddings.create.return_value = mock_response + + result = embedder.embed("Testing API key") + + mock_openai_client.embeddings.create.assert_called_once_with( + input=["Testing API key"], model="text-embedding-3-small" + ) + assert result == [1.0, 1.1, 1.2] + + +def test_embed_uses_environment_api_key(mock_openai_client, monkeypatch): + monkeypatch.setenv("OPENAI_API_KEY", "env_key") + config = BaseEmbedderConfig() + embedder = OpenAIEmbedding(config) + mock_response = Mock() + mock_response.data = [Mock(embedding=[1.3, 1.4, 1.5])] + mock_openai_client.embeddings.create.return_value = mock_response + + result = embedder.embed("Environment key test") + + mock_openai_client.embeddings.create.assert_called_once_with( + input=["Environment key test"], model="text-embedding-3-small" + ) + assert result == [1.3, 1.4, 1.5]