add test cases for embeddings (#1829)
This commit is contained in:
2
Makefile
2
Makefile
@@ -12,7 +12,7 @@ install:
|
|||||||
|
|
||||||
install_all:
|
install_all:
|
||||||
poetry install
|
poetry install
|
||||||
poetry run pip install groq together boto3 litellm ollama
|
poetry run pip install groq together boto3 litellm ollama sentence_transformers
|
||||||
|
|
||||||
# Format code with ruff
|
# Format code with ruff
|
||||||
format:
|
format:
|
||||||
|
|||||||
46
tests/embeddings/test_azure_openai_embeddings.py
Normal file
46
tests/embeddings/test_azure_openai_embeddings.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from mem0.embeddings.azure_openai import AzureOpenAIEmbedding
|
||||||
|
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_openai_client():
|
||||||
|
with patch("mem0.embeddings.azure_openai.AzureOpenAI") as mock_openai:
|
||||||
|
mock_client = Mock()
|
||||||
|
mock_openai.return_value = mock_client
|
||||||
|
yield mock_client
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_text(mock_openai_client):
|
||||||
|
config = BaseEmbedderConfig(model="text-embedding-ada-002")
|
||||||
|
embedder = AzureOpenAIEmbedding(config)
|
||||||
|
|
||||||
|
mock_embedding_response = Mock()
|
||||||
|
mock_embedding_response.data = [Mock(embedding=[0.1, 0.2, 0.3])]
|
||||||
|
mock_openai_client.embeddings.create.return_value = mock_embedding_response
|
||||||
|
|
||||||
|
text = "Hello, this is a test."
|
||||||
|
embedding = embedder.embed(text)
|
||||||
|
|
||||||
|
mock_openai_client.embeddings.create.assert_called_once_with(
|
||||||
|
input=["Hello, this is a test."], model="text-embedding-ada-002"
|
||||||
|
)
|
||||||
|
assert embedding == [0.1, 0.2, 0.3]
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_text_with_newlines(mock_openai_client):
|
||||||
|
config = BaseEmbedderConfig(model="text-embedding-ada-002")
|
||||||
|
embedder = AzureOpenAIEmbedding(config)
|
||||||
|
|
||||||
|
mock_embedding_response = Mock()
|
||||||
|
mock_embedding_response.data = [Mock(embedding=[0.4, 0.5, 0.6])]
|
||||||
|
mock_openai_client.embeddings.create.return_value = mock_embedding_response
|
||||||
|
|
||||||
|
text = "Hello,\nthis is a test\nwith newlines."
|
||||||
|
embedding = embedder.embed(text)
|
||||||
|
|
||||||
|
mock_openai_client.embeddings.create.assert_called_once_with(
|
||||||
|
input=["Hello, this is a test with newlines."], model="text-embedding-ada-002"
|
||||||
|
)
|
||||||
|
assert embedding == [0.4, 0.5, 0.6]
|
||||||
74
tests/embeddings/test_huggingface_embeddings.py
Normal file
74
tests/embeddings/test_huggingface_embeddings.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from mem0.embeddings.huggingface import HuggingFaceEmbedding
|
||||||
|
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_sentence_transformer():
|
||||||
|
with patch("mem0.embeddings.huggingface.SentenceTransformer") as mock_transformer:
|
||||||
|
mock_model = Mock()
|
||||||
|
mock_transformer.return_value = mock_model
|
||||||
|
yield mock_model
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_default_model(mock_sentence_transformer):
|
||||||
|
config = BaseEmbedderConfig()
|
||||||
|
embedder = HuggingFaceEmbedding(config)
|
||||||
|
|
||||||
|
mock_sentence_transformer.encode.return_value = [0.1, 0.2, 0.3]
|
||||||
|
result = embedder.embed("Hello world")
|
||||||
|
|
||||||
|
mock_sentence_transformer.encode.assert_called_once_with("Hello world")
|
||||||
|
|
||||||
|
assert result == [0.1, 0.2, 0.3]
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_custom_model(mock_sentence_transformer):
|
||||||
|
config = BaseEmbedderConfig(model="paraphrase-MiniLM-L6-v2")
|
||||||
|
embedder = HuggingFaceEmbedding(config)
|
||||||
|
|
||||||
|
mock_sentence_transformer.encode.return_value = [0.4, 0.5, 0.6]
|
||||||
|
result = embedder.embed("Custom model test")
|
||||||
|
|
||||||
|
mock_sentence_transformer.encode.assert_called_once_with("Custom model test")
|
||||||
|
|
||||||
|
assert result == [0.4, 0.5, 0.6]
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_with_model_kwargs(mock_sentence_transformer):
|
||||||
|
config = BaseEmbedderConfig(
|
||||||
|
model="all-MiniLM-L6-v2", model_kwargs={"device": "cuda"}
|
||||||
|
)
|
||||||
|
embedder = HuggingFaceEmbedding(config)
|
||||||
|
|
||||||
|
mock_sentence_transformer.encode.return_value = [0.7, 0.8, 0.9]
|
||||||
|
result = embedder.embed("Test with device")
|
||||||
|
|
||||||
|
mock_sentence_transformer.encode.assert_called_once_with("Test with device")
|
||||||
|
|
||||||
|
assert result == [0.7, 0.8, 0.9]
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_sets_embedding_dims(mock_sentence_transformer):
|
||||||
|
config = BaseEmbedderConfig()
|
||||||
|
|
||||||
|
mock_sentence_transformer.get_sentence_embedding_dimension.return_value = 384
|
||||||
|
embedder = HuggingFaceEmbedding(config)
|
||||||
|
|
||||||
|
assert embedder.config.embedding_dims == 384
|
||||||
|
mock_sentence_transformer.get_sentence_embedding_dimension.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_with_custom_embedding_dims(mock_sentence_transformer):
|
||||||
|
config = BaseEmbedderConfig(model="all-mpnet-base-v2", embedding_dims=768)
|
||||||
|
embedder = HuggingFaceEmbedding(config)
|
||||||
|
|
||||||
|
mock_sentence_transformer.encode.return_value = [1.0, 1.1, 1.2]
|
||||||
|
result = embedder.embed("Custom embedding dims")
|
||||||
|
|
||||||
|
mock_sentence_transformer.encode.assert_called_once_with("Custom embedding dims")
|
||||||
|
|
||||||
|
assert embedder.config.embedding_dims == 768
|
||||||
|
|
||||||
|
assert result == [1.0, 1.1, 1.2]
|
||||||
43
tests/embeddings/test_ollama_embeddings.py
Normal file
43
tests/embeddings/test_ollama_embeddings.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from mem0.embeddings.ollama import OllamaEmbedding
|
||||||
|
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_ollama_client():
|
||||||
|
with patch("mem0.embeddings.ollama.Client") as mock_ollama:
|
||||||
|
mock_client = Mock()
|
||||||
|
mock_client.list.return_value = {"models": [{"name": "nomic-embed-text"}]}
|
||||||
|
mock_ollama.return_value = mock_client
|
||||||
|
yield mock_client
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_text(mock_ollama_client):
|
||||||
|
config = BaseEmbedderConfig(model="nomic-embed-text", embedding_dims=512)
|
||||||
|
embedder = OllamaEmbedding(config)
|
||||||
|
|
||||||
|
mock_response = {"embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}
|
||||||
|
mock_ollama_client.embeddings.return_value = mock_response
|
||||||
|
|
||||||
|
text = "Sample text to embed."
|
||||||
|
embedding = embedder.embed(text)
|
||||||
|
|
||||||
|
mock_ollama_client.embeddings.assert_called_once_with(
|
||||||
|
model="nomic-embed-text", prompt=text
|
||||||
|
)
|
||||||
|
|
||||||
|
assert embedding == [0.1, 0.2, 0.3, 0.4, 0.5]
|
||||||
|
|
||||||
|
|
||||||
|
def test_ensure_model_exists(mock_ollama_client):
|
||||||
|
config = BaseEmbedderConfig(model="nomic-embed-text", embedding_dims=512)
|
||||||
|
embedder = OllamaEmbedding(config)
|
||||||
|
|
||||||
|
mock_ollama_client.pull.assert_not_called()
|
||||||
|
|
||||||
|
mock_ollama_client.list.return_value = {"models": []}
|
||||||
|
|
||||||
|
embedder._ensure_model_exists()
|
||||||
|
|
||||||
|
mock_ollama_client.pull.assert_called_once_with("nomic-embed-text")
|
||||||
88
tests/embeddings/test_openai_embeddings.py
Normal file
88
tests/embeddings/test_openai_embeddings.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from mem0.embeddings.openai import OpenAIEmbedding
|
||||||
|
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_openai_client():
|
||||||
|
with patch("mem0.embeddings.openai.OpenAI") as mock_openai:
|
||||||
|
mock_client = Mock()
|
||||||
|
mock_openai.return_value = mock_client
|
||||||
|
yield mock_client
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_default_model(mock_openai_client):
|
||||||
|
config = BaseEmbedderConfig()
|
||||||
|
embedder = OpenAIEmbedding(config)
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.data = [Mock(embedding=[0.1, 0.2, 0.3])]
|
||||||
|
mock_openai_client.embeddings.create.return_value = mock_response
|
||||||
|
|
||||||
|
result = embedder.embed("Hello world")
|
||||||
|
|
||||||
|
mock_openai_client.embeddings.create.assert_called_once_with(
|
||||||
|
input=["Hello world"], model="text-embedding-3-small"
|
||||||
|
)
|
||||||
|
assert result == [0.1, 0.2, 0.3]
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_custom_model(mock_openai_client):
|
||||||
|
config = BaseEmbedderConfig(model="text-embedding-2-medium", embedding_dims=1024)
|
||||||
|
embedder = OpenAIEmbedding(config)
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.data = [Mock(embedding=[0.4, 0.5, 0.6])]
|
||||||
|
mock_openai_client.embeddings.create.return_value = mock_response
|
||||||
|
|
||||||
|
result = embedder.embed("Test embedding")
|
||||||
|
|
||||||
|
mock_openai_client.embeddings.create.assert_called_once_with(
|
||||||
|
input=["Test embedding"], model="text-embedding-2-medium"
|
||||||
|
)
|
||||||
|
assert result == [0.4, 0.5, 0.6]
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_removes_newlines(mock_openai_client):
|
||||||
|
config = BaseEmbedderConfig()
|
||||||
|
embedder = OpenAIEmbedding(config)
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.data = [Mock(embedding=[0.7, 0.8, 0.9])]
|
||||||
|
mock_openai_client.embeddings.create.return_value = mock_response
|
||||||
|
|
||||||
|
result = embedder.embed("Hello\nworld")
|
||||||
|
|
||||||
|
mock_openai_client.embeddings.create.assert_called_once_with(
|
||||||
|
input=["Hello world"], model="text-embedding-3-small"
|
||||||
|
)
|
||||||
|
assert result == [0.7, 0.8, 0.9]
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_without_api_key_env_var(mock_openai_client):
|
||||||
|
config = BaseEmbedderConfig(api_key="test_key")
|
||||||
|
embedder = OpenAIEmbedding(config)
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.data = [Mock(embedding=[1.0, 1.1, 1.2])]
|
||||||
|
mock_openai_client.embeddings.create.return_value = mock_response
|
||||||
|
|
||||||
|
result = embedder.embed("Testing API key")
|
||||||
|
|
||||||
|
mock_openai_client.embeddings.create.assert_called_once_with(
|
||||||
|
input=["Testing API key"], model="text-embedding-3-small"
|
||||||
|
)
|
||||||
|
assert result == [1.0, 1.1, 1.2]
|
||||||
|
|
||||||
|
|
||||||
|
def test_embed_uses_environment_api_key(mock_openai_client, monkeypatch):
|
||||||
|
monkeypatch.setenv("OPENAI_API_KEY", "env_key")
|
||||||
|
config = BaseEmbedderConfig()
|
||||||
|
embedder = OpenAIEmbedding(config)
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.data = [Mock(embedding=[1.3, 1.4, 1.5])]
|
||||||
|
mock_openai_client.embeddings.create.return_value = mock_response
|
||||||
|
|
||||||
|
result = embedder.embed("Environment key test")
|
||||||
|
|
||||||
|
mock_openai_client.embeddings.create.assert_called_once_with(
|
||||||
|
input=["Environment key test"], model="text-embedding-3-small"
|
||||||
|
)
|
||||||
|
assert result == [1.3, 1.4, 1.5]
|
||||||
Reference in New Issue
Block a user