[feat]: Add support for XML file format (#757)
This commit is contained in:
62
tests/loaders/test_xml.py
Normal file
62
tests/loaders/test_xml.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from embedchain.loaders.xml import XmlLoader
|
||||
|
||||
# Taken from https://github.com/langchain-ai/langchain/blob/master/libs/langchain/tests/integration_tests/examples/factbook.xml
|
||||
SAMPLE_XML = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<factbook>
|
||||
<country>
|
||||
<name>United States</name>
|
||||
<capital>Washington, DC</capital>
|
||||
<leader>Joe Biden</leader>
|
||||
<sport>Baseball</sport>
|
||||
</country>
|
||||
<country>
|
||||
<name>Canada</name>
|
||||
<capital>Ottawa</capital>
|
||||
<leader>Justin Trudeau</leader>
|
||||
<sport>Hockey</sport>
|
||||
</country>
|
||||
<country>
|
||||
<name>France</name>
|
||||
<capital>Paris</capital>
|
||||
<leader>Emmanuel Macron</leader>
|
||||
<sport>Soccer</sport>
|
||||
</country>
|
||||
<country>
|
||||
<name>Trinidad & Tobado</name>
|
||||
<capital>Port of Spain</capital>
|
||||
<leader>Keith Rowley</leader>
|
||||
<sport>Track & Field</sport>
|
||||
</country>
|
||||
</factbook>"""
|
||||
|
||||
|
||||
@pytest.mark.parametrize("xml", [SAMPLE_XML])
|
||||
def test_load_data(xml: str):
|
||||
"""
|
||||
Test XML loader
|
||||
|
||||
Tests that XML file is loaded, metadata is correct and content is correct
|
||||
"""
|
||||
# Creating temporary XML file
|
||||
with tempfile.NamedTemporaryFile(mode="w+") as tmpfile:
|
||||
tmpfile.write(xml)
|
||||
|
||||
tmpfile.seek(0)
|
||||
filename = tmpfile.name
|
||||
|
||||
# Loading CSV using XmlLoader
|
||||
loader = XmlLoader()
|
||||
result = loader.load_data(filename)
|
||||
data = result["data"]
|
||||
|
||||
# Assertions
|
||||
assert len(data) == 1
|
||||
assert "United States Washington, DC Joe Biden" in data[0]["content"]
|
||||
assert "Canada Ottawa Justin Trudeau" in data[0]["content"]
|
||||
assert "France Paris Emmanuel Macron" in data[0]["content"]
|
||||
assert "Trinidad & Tobado Port of Spain Keith Rowley" in data[0]["content"]
|
||||
assert data[0]["meta_data"]["url"] == filename
|
||||
Reference in New Issue
Block a user