[Feature] Google Drive Folder support as a data source (#1106)

This commit is contained in:
Joe Sleiman
2024-01-05 08:16:01 +02:00
committed by GitHub
parent 38ad57a22c
commit b4ec14382b
10 changed files with 185 additions and 2 deletions

View File

@@ -3,6 +3,7 @@ from embedchain.chunkers.discourse import DiscourseChunker
from embedchain.chunkers.docs_site import DocsSiteChunker
from embedchain.chunkers.docx_file import DocxFileChunker
from embedchain.chunkers.gmail import GmailChunker
from embedchain.chunkers.google_drive import GoogleDriveChunker
from embedchain.chunkers.json import JSONChunker
from embedchain.chunkers.mdx import MdxChunker
from embedchain.chunkers.notion import NotionChunker
@@ -41,6 +42,7 @@ chunker_common_config = {
SlackChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
DiscourseChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
CommonChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
GoogleDriveChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
}

View File

@@ -0,0 +1,37 @@
import pytest
from embedchain.loaders.google_drive import GoogleDriveLoader
@pytest.fixture
def google_drive_folder_loader():
return GoogleDriveLoader()
def test_load_data_invalid_drive_url(google_drive_folder_loader):
mock_invalid_drive_url = "https://example.com"
with pytest.raises(
ValueError,
match="The url provided https://example.com does not match a google drive folder url. Example "
"drive url: https://drive.google.com/drive/u/0/folders/xxxx",
):
google_drive_folder_loader.load_data(mock_invalid_drive_url)
@pytest.mark.skip(reason="This test won't work unless google api credentials are properly setup.")
def test_load_data_incorrect_drive_url(google_drive_folder_loader):
mock_invalid_drive_url = "https://drive.google.com/drive/u/0/folders/xxxx"
with pytest.raises(
FileNotFoundError, match="Unable to locate folder or files, check provided drive URL and try again"
):
google_drive_folder_loader.load_data(mock_invalid_drive_url)
@pytest.mark.skip(reason="This test won't work unless google api credentials are properly setup.")
def test_load_data(google_drive_folder_loader):
mock_valid_url = "YOUR_VALID_URL"
result = google_drive_folder_loader.load_data(mock_valid_url)
assert "doc_id" in result
assert "data" in result
assert "content" in result["data"][0]
assert "meta_data" in result["data"][0]