add test cases for embeddings (#1829)

This commit is contained in:
Anusha Kondam
2024-09-13 06:36:51 -05:00
committed by GitHub
parent 47a8e677e9
commit f9634b4bf3
5 changed files with 252 additions and 1 deletions

View File

@@ -12,7 +12,7 @@ install:
install_all:
poetry install
poetry run pip install groq together boto3 litellm ollama
poetry run pip install groq together boto3 litellm ollama sentence_transformers
# Format code with ruff
format:

View File

@@ -0,0 +1,46 @@
import pytest
from unittest.mock import Mock, patch
from mem0.embeddings.azure_openai import AzureOpenAIEmbedding
from mem0.configs.embeddings.base import BaseEmbedderConfig
@pytest.fixture
def mock_openai_client():
with patch("mem0.embeddings.azure_openai.AzureOpenAI") as mock_openai:
mock_client = Mock()
mock_openai.return_value = mock_client
yield mock_client
def test_embed_text(mock_openai_client):
config = BaseEmbedderConfig(model="text-embedding-ada-002")
embedder = AzureOpenAIEmbedding(config)
mock_embedding_response = Mock()
mock_embedding_response.data = [Mock(embedding=[0.1, 0.2, 0.3])]
mock_openai_client.embeddings.create.return_value = mock_embedding_response
text = "Hello, this is a test."
embedding = embedder.embed(text)
mock_openai_client.embeddings.create.assert_called_once_with(
input=["Hello, this is a test."], model="text-embedding-ada-002"
)
assert embedding == [0.1, 0.2, 0.3]
def test_embed_text_with_newlines(mock_openai_client):
config = BaseEmbedderConfig(model="text-embedding-ada-002")
embedder = AzureOpenAIEmbedding(config)
mock_embedding_response = Mock()
mock_embedding_response.data = [Mock(embedding=[0.4, 0.5, 0.6])]
mock_openai_client.embeddings.create.return_value = mock_embedding_response
text = "Hello,\nthis is a test\nwith newlines."
embedding = embedder.embed(text)
mock_openai_client.embeddings.create.assert_called_once_with(
input=["Hello, this is a test with newlines."], model="text-embedding-ada-002"
)
assert embedding == [0.4, 0.5, 0.6]

View File

@@ -0,0 +1,74 @@
import pytest
from unittest.mock import Mock, patch
from mem0.embeddings.huggingface import HuggingFaceEmbedding
from mem0.configs.embeddings.base import BaseEmbedderConfig
@pytest.fixture
def mock_sentence_transformer():
with patch("mem0.embeddings.huggingface.SentenceTransformer") as mock_transformer:
mock_model = Mock()
mock_transformer.return_value = mock_model
yield mock_model
def test_embed_default_model(mock_sentence_transformer):
config = BaseEmbedderConfig()
embedder = HuggingFaceEmbedding(config)
mock_sentence_transformer.encode.return_value = [0.1, 0.2, 0.3]
result = embedder.embed("Hello world")
mock_sentence_transformer.encode.assert_called_once_with("Hello world")
assert result == [0.1, 0.2, 0.3]
def test_embed_custom_model(mock_sentence_transformer):
config = BaseEmbedderConfig(model="paraphrase-MiniLM-L6-v2")
embedder = HuggingFaceEmbedding(config)
mock_sentence_transformer.encode.return_value = [0.4, 0.5, 0.6]
result = embedder.embed("Custom model test")
mock_sentence_transformer.encode.assert_called_once_with("Custom model test")
assert result == [0.4, 0.5, 0.6]
def test_embed_with_model_kwargs(mock_sentence_transformer):
config = BaseEmbedderConfig(
model="all-MiniLM-L6-v2", model_kwargs={"device": "cuda"}
)
embedder = HuggingFaceEmbedding(config)
mock_sentence_transformer.encode.return_value = [0.7, 0.8, 0.9]
result = embedder.embed("Test with device")
mock_sentence_transformer.encode.assert_called_once_with("Test with device")
assert result == [0.7, 0.8, 0.9]
def test_embed_sets_embedding_dims(mock_sentence_transformer):
config = BaseEmbedderConfig()
mock_sentence_transformer.get_sentence_embedding_dimension.return_value = 384
embedder = HuggingFaceEmbedding(config)
assert embedder.config.embedding_dims == 384
mock_sentence_transformer.get_sentence_embedding_dimension.assert_called_once()
def test_embed_with_custom_embedding_dims(mock_sentence_transformer):
config = BaseEmbedderConfig(model="all-mpnet-base-v2", embedding_dims=768)
embedder = HuggingFaceEmbedding(config)
mock_sentence_transformer.encode.return_value = [1.0, 1.1, 1.2]
result = embedder.embed("Custom embedding dims")
mock_sentence_transformer.encode.assert_called_once_with("Custom embedding dims")
assert embedder.config.embedding_dims == 768
assert result == [1.0, 1.1, 1.2]

View File

@@ -0,0 +1,43 @@
import pytest
from unittest.mock import Mock, patch
from mem0.embeddings.ollama import OllamaEmbedding
from mem0.configs.embeddings.base import BaseEmbedderConfig
@pytest.fixture
def mock_ollama_client():
with patch("mem0.embeddings.ollama.Client") as mock_ollama:
mock_client = Mock()
mock_client.list.return_value = {"models": [{"name": "nomic-embed-text"}]}
mock_ollama.return_value = mock_client
yield mock_client
def test_embed_text(mock_ollama_client):
config = BaseEmbedderConfig(model="nomic-embed-text", embedding_dims=512)
embedder = OllamaEmbedding(config)
mock_response = {"embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}
mock_ollama_client.embeddings.return_value = mock_response
text = "Sample text to embed."
embedding = embedder.embed(text)
mock_ollama_client.embeddings.assert_called_once_with(
model="nomic-embed-text", prompt=text
)
assert embedding == [0.1, 0.2, 0.3, 0.4, 0.5]
def test_ensure_model_exists(mock_ollama_client):
config = BaseEmbedderConfig(model="nomic-embed-text", embedding_dims=512)
embedder = OllamaEmbedding(config)
mock_ollama_client.pull.assert_not_called()
mock_ollama_client.list.return_value = {"models": []}
embedder._ensure_model_exists()
mock_ollama_client.pull.assert_called_once_with("nomic-embed-text")

View File

@@ -0,0 +1,88 @@
import pytest
from unittest.mock import Mock, patch
from mem0.embeddings.openai import OpenAIEmbedding
from mem0.configs.embeddings.base import BaseEmbedderConfig
@pytest.fixture
def mock_openai_client():
with patch("mem0.embeddings.openai.OpenAI") as mock_openai:
mock_client = Mock()
mock_openai.return_value = mock_client
yield mock_client
def test_embed_default_model(mock_openai_client):
config = BaseEmbedderConfig()
embedder = OpenAIEmbedding(config)
mock_response = Mock()
mock_response.data = [Mock(embedding=[0.1, 0.2, 0.3])]
mock_openai_client.embeddings.create.return_value = mock_response
result = embedder.embed("Hello world")
mock_openai_client.embeddings.create.assert_called_once_with(
input=["Hello world"], model="text-embedding-3-small"
)
assert result == [0.1, 0.2, 0.3]
def test_embed_custom_model(mock_openai_client):
config = BaseEmbedderConfig(model="text-embedding-2-medium", embedding_dims=1024)
embedder = OpenAIEmbedding(config)
mock_response = Mock()
mock_response.data = [Mock(embedding=[0.4, 0.5, 0.6])]
mock_openai_client.embeddings.create.return_value = mock_response
result = embedder.embed("Test embedding")
mock_openai_client.embeddings.create.assert_called_once_with(
input=["Test embedding"], model="text-embedding-2-medium"
)
assert result == [0.4, 0.5, 0.6]
def test_embed_removes_newlines(mock_openai_client):
config = BaseEmbedderConfig()
embedder = OpenAIEmbedding(config)
mock_response = Mock()
mock_response.data = [Mock(embedding=[0.7, 0.8, 0.9])]
mock_openai_client.embeddings.create.return_value = mock_response
result = embedder.embed("Hello\nworld")
mock_openai_client.embeddings.create.assert_called_once_with(
input=["Hello world"], model="text-embedding-3-small"
)
assert result == [0.7, 0.8, 0.9]
def test_embed_without_api_key_env_var(mock_openai_client):
config = BaseEmbedderConfig(api_key="test_key")
embedder = OpenAIEmbedding(config)
mock_response = Mock()
mock_response.data = [Mock(embedding=[1.0, 1.1, 1.2])]
mock_openai_client.embeddings.create.return_value = mock_response
result = embedder.embed("Testing API key")
mock_openai_client.embeddings.create.assert_called_once_with(
input=["Testing API key"], model="text-embedding-3-small"
)
assert result == [1.0, 1.1, 1.2]
def test_embed_uses_environment_api_key(mock_openai_client, monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "env_key")
config = BaseEmbedderConfig()
embedder = OpenAIEmbedding(config)
mock_response = Mock()
mock_response.data = [Mock(embedding=[1.3, 1.4, 1.5])]
mock_openai_client.embeddings.create.return_value = mock_response
result = embedder.embed("Environment key test")
mock_openai_client.embeddings.create.assert_called_once_with(
input=["Environment key test"], model="text-embedding-3-small"
)
assert result == [1.3, 1.4, 1.5]