Support for Excel files (#1319)
This commit is contained in:
@@ -2,6 +2,7 @@ from embedchain.chunkers.common_chunker import CommonChunker
|
||||
from embedchain.chunkers.discourse import DiscourseChunker
|
||||
from embedchain.chunkers.docs_site import DocsSiteChunker
|
||||
from embedchain.chunkers.docx_file import DocxFileChunker
|
||||
from embedchain.chunkers.excel_file import ExcelFileChunker
|
||||
from embedchain.chunkers.gmail import GmailChunker
|
||||
from embedchain.chunkers.google_drive import GoogleDriveChunker
|
||||
from embedchain.chunkers.json import JSONChunker
|
||||
@@ -43,6 +44,7 @@ chunker_common_config = {
|
||||
DiscourseChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||
CommonChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
|
||||
GoogleDriveChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||
ExcelFileChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||
}
|
||||
|
||||
|
||||
|
||||
33
tests/loaders/test_excel_file.py
Normal file
33
tests/loaders/test_excel_file.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import hashlib
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from embedchain.loaders.excel_file import ExcelFileLoader
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def excel_file_loader():
|
||||
return ExcelFileLoader()
|
||||
|
||||
|
||||
def test_load_data(excel_file_loader):
|
||||
mock_url = "mock_excel_file.xlsx"
|
||||
expected_content = "Sample Excel Content"
|
||||
|
||||
# Mock the load_data method of the excel_file_loader instance
|
||||
with patch.object(
|
||||
excel_file_loader,
|
||||
"load_data",
|
||||
return_value={
|
||||
"doc_id": hashlib.sha256((expected_content + mock_url).encode()).hexdigest(),
|
||||
"data": [{"content": expected_content, "meta_data": {"url": mock_url}}],
|
||||
},
|
||||
):
|
||||
result = excel_file_loader.load_data(mock_url)
|
||||
|
||||
assert result["data"][0]["content"] == expected_content
|
||||
assert result["data"][0]["meta_data"]["url"] == mock_url
|
||||
|
||||
expected_doc_id = hashlib.sha256((expected_content + mock_url).encode()).hexdigest()
|
||||
assert result["doc_id"] == expected_doc_id
|
||||
Reference in New Issue
Block a user