Improve tests (#800)
This commit is contained in:
@@ -1,70 +1,49 @@
|
||||
import os
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from embedchain import App
|
||||
from embedchain.config import AddConfig, AppConfig, ChunkerConfig
|
||||
from embedchain.models.data_type import DataType
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "test_key"
|
||||
|
||||
class TestApp(unittest.TestCase):
|
||||
os.environ["OPENAI_API_KEY"] = "test_key"
|
||||
|
||||
def setUp(self):
|
||||
self.app = App(config=AppConfig(collect_metrics=False))
|
||||
@pytest.fixture
|
||||
def app(mocker):
|
||||
mocker.patch("chromadb.api.models.Collection.Collection.add")
|
||||
return App(config=AppConfig(collect_metrics=False))
|
||||
|
||||
@patch("chromadb.api.models.Collection.Collection.add", MagicMock)
|
||||
def test_add(self):
|
||||
"""
|
||||
This test checks the functionality of the 'add' method in the App class.
|
||||
It begins by simulating the addition of a web page with a specific URL to the application instance.
|
||||
The 'add' method is expected to append the input type and URL to the 'user_asks' attribute of the App instance.
|
||||
By asserting that 'user_asks' is updated correctly after the 'add' method is called, we can confirm that the
|
||||
method is working as intended.
|
||||
The Collection.add method from the chromadb library is mocked during this test to isolate the behavior of the
|
||||
'add' method.
|
||||
"""
|
||||
self.app.add("https://example.com", metadata={"meta": "meta-data"})
|
||||
self.assertEqual(self.app.user_asks, [["https://example.com", "web_page", {"meta": "meta-data"}]])
|
||||
|
||||
@patch("chromadb.api.models.Collection.Collection.add", MagicMock)
|
||||
def test_add_sitemap(self):
|
||||
"""
|
||||
In addition to the test_add function, this test checks that sitemaps can be added with the correct data type.
|
||||
"""
|
||||
self.app.add("https://www.google.com/sitemap.xml", metadata={"meta": "meta-data"})
|
||||
self.assertEqual(self.app.user_asks, [["https://www.google.com/sitemap.xml", "sitemap", {"meta": "meta-data"}]])
|
||||
def test_add(app):
|
||||
app.add("https://example.com", metadata={"meta": "meta-data"})
|
||||
assert app.user_asks == [["https://example.com", "web_page", {"meta": "meta-data"}]]
|
||||
|
||||
@patch("chromadb.api.models.Collection.Collection.add", MagicMock)
|
||||
def test_add_forced_type(self):
|
||||
"""
|
||||
Test that you can also force a data_type with `add`.
|
||||
"""
|
||||
data_type = "text"
|
||||
self.app.add("https://example.com", data_type=data_type, metadata={"meta": "meta-data"})
|
||||
self.assertEqual(self.app.user_asks, [["https://example.com", data_type, {"meta": "meta-data"}]])
|
||||
|
||||
@patch("chromadb.api.models.Collection.Collection.add", MagicMock)
|
||||
def test_dry_run(self):
|
||||
"""
|
||||
Test that if dry_run == True then data chunks are returned.
|
||||
"""
|
||||
def test_add_sitemap(app):
|
||||
app.add("https://www.google.com/sitemap.xml", metadata={"meta": "meta-data"})
|
||||
assert app.user_asks == [["https://www.google.com/sitemap.xml", "sitemap", {"meta": "meta-data"}]]
|
||||
|
||||
chunker_config = ChunkerConfig(chunk_size=1, chunk_overlap=0)
|
||||
# We can't test with lorem ipsum because chunks are deduped, so would be recurring characters.
|
||||
text = """0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"""
|
||||
|
||||
result = self.app.add(source=text, config=AddConfig(chunker=chunker_config), dry_run=True)
|
||||
def test_add_forced_type(app):
|
||||
data_type = "text"
|
||||
app.add("https://example.com", data_type=data_type, metadata={"meta": "meta-data"})
|
||||
assert app.user_asks == [["https://example.com", data_type, {"meta": "meta-data"}]]
|
||||
|
||||
chunks = result["chunks"]
|
||||
metadata = result["metadata"]
|
||||
count = result["count"]
|
||||
data_type = result["type"]
|
||||
|
||||
self.assertEqual(len(chunks), len(text))
|
||||
self.assertEqual(count, len(text))
|
||||
self.assertEqual(data_type, DataType.TEXT)
|
||||
for item in metadata:
|
||||
self.assertIsInstance(item, dict)
|
||||
self.assertIn(item["url"], "local")
|
||||
self.assertIn(item["data_type"], "text")
|
||||
def test_dry_run(app):
|
||||
chunker_config = ChunkerConfig(chunk_size=1, chunk_overlap=0)
|
||||
text = """0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"""
|
||||
|
||||
result = app.add(source=text, config=AddConfig(chunker=chunker_config), dry_run=True)
|
||||
|
||||
chunks = result["chunks"]
|
||||
metadata = result["metadata"]
|
||||
count = result["count"]
|
||||
data_type = result["type"]
|
||||
|
||||
assert len(chunks) == len(text)
|
||||
assert count == len(text)
|
||||
assert data_type == DataType.TEXT
|
||||
for item in metadata:
|
||||
assert isinstance(item, dict)
|
||||
assert "local" in item["url"]
|
||||
assert "text" in item["data_type"]
|
||||
|
||||
Reference in New Issue
Block a user