[feat]: Add openapi spec data loader (#818)

This commit is contained in:
Deven Patel
2023-10-25 14:19:13 -07:00
committed by GitHub
parent f2a5dc40ee
commit 797bb567c6
13 changed files with 212 additions and 0 deletions

View File

@@ -3,6 +3,7 @@ from embedchain.chunkers.docx_file import DocxFileChunker
from embedchain.chunkers.json import JSONChunker
from embedchain.chunkers.mdx import MdxChunker
from embedchain.chunkers.notion import NotionChunker
from embedchain.chunkers.openapi import OpenAPIChunker
from embedchain.chunkers.pdf_file import PdfFileChunker
from embedchain.chunkers.qna_pair import QnaPairChunker
from embedchain.chunkers.sitemap import SitemapChunker
@@ -29,6 +30,7 @@ chunker_common_config = {
XmlChunker: {"chunk_size": 500, "chunk_overlap": 50, "length_function": len},
YoutubeVideoChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
OpenAPIChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
}

View File

@@ -39,6 +39,12 @@ class TestApp(unittest.TestCase):
def test_detect_datatype_local_docx(self):
self.assertEqual(detect_datatype("file:///home/user/document.docx"), DataType.DOCX)
def test_detect_data_type_json(self):
self.assertEqual(detect_datatype("https://www.example.com/data.json"), DataType.JSON)
def test_detect_data_type_local_json(self):
self.assertEqual(detect_datatype("file:///home/user/data.json"), DataType.JSON)
@patch("os.path.isfile")
def test_detect_datatype_regular_filesystem_docx(self, mock_isfile):
with tempfile.NamedTemporaryFile(suffix=".docx", delete=True) as tmp:

View File

@@ -0,0 +1,26 @@
import pytest
from embedchain.loaders.openapi import OpenAPILoader
@pytest.fixture
def openapi_loader():
return OpenAPILoader()
def test_load_data(openapi_loader, mocker):
mocker.patch("builtins.open", mocker.mock_open(read_data="key1: value1\nkey2: value2"))
mocker.patch("hashlib.sha256", return_value=mocker.Mock(hexdigest=lambda: "mock_hash"))
file_path = "configs/openai_openapi.yaml"
result = openapi_loader.load_data(file_path)
expected_doc_id = "mock_hash"
expected_data = [
{"content": "key1: value1", "meta_data": {"url": file_path, "row": 1}},
{"content": "key2: value2", "meta_data": {"url": file_path, "row": 2}},
]
assert result["doc_id"] == expected_doc_id
assert result["data"] == expected_data