[Feature] Gmail Loader (#841)

This commit is contained in:
Deven Patel
2023-10-27 18:05:08 -07:00
committed by GitHub
parent 78ec91a3a9
commit 68183e9dce
12 changed files with 354 additions and 91 deletions

View File

@@ -1,5 +1,6 @@
from embedchain.chunkers.docs_site import DocsSiteChunker
from embedchain.chunkers.docx_file import DocxFileChunker
from embedchain.chunkers.gmail import GmailChunker
from embedchain.chunkers.json import JSONChunker
from embedchain.chunkers.mdx import MdxChunker
from embedchain.chunkers.notion import NotionChunker
@@ -31,6 +32,7 @@ chunker_common_config = {
YoutubeVideoChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
OpenAPIChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
GmailChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
}

View File

@@ -0,0 +1,49 @@
import pytest
from llama_index.readers.schema.base import Document
from embedchain.loaders.gmail import GmailLoader
@pytest.fixture
def mock_download_loader(mocker):
return mocker.patch("embedchain.loaders.gmail.download_loader")
@pytest.fixture
def mock_quopri(mocker):
return mocker.patch("embedchain.loaders.gmail.quopri.decodestring", return_value=b"your_test_decoded_string")
@pytest.fixture
def mock_beautifulsoup(mocker):
return mocker.patch("embedchain.loaders.gmail.BeautifulSoup", return_value=mocker.MagicMock())
@pytest.fixture
def gmail_loader(mock_download_loader, mock_quopri, mock_beautifulsoup):
return GmailLoader()
def test_load_data_file_not_found(gmail_loader, mocker):
with pytest.raises(FileNotFoundError):
with mocker.patch("os.path.isfile", return_value=False):
gmail_loader.load_data("your_query")
def test_load_data(gmail_loader, mock_download_loader, mocker):
mock_gmail_reader_instance = mocker.MagicMock()
text = "your_test_email_text"
metadata = {
"id": "your_test_id",
"snippet": "your_test_snippet",
}
mock_gmail_reader_instance.load_data.return_value = [Document(text=text, extra_info=metadata)]
mock_download_loader.return_value = mock_gmail_reader_instance
with mocker.patch("os.path.isfile", return_value=True):
response_data = gmail_loader.load_data("your_query")
assert "doc_id" in response_data
assert "data" in response_data
assert isinstance(response_data["doc_id"], str)
assert isinstance(response_data["data"], list)