[Feature] Discourse Loader (#948)

Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
Deven Patel
2023-11-13 16:39:11 -08:00
committed by GitHub
parent 919cc74e94
commit 95c0d47236
12 changed files with 324 additions and 4 deletions

View File

@@ -1,3 +1,4 @@
from embedchain.chunkers.discourse import DiscourseChunker
from embedchain.chunkers.docs_site import DocsSiteChunker
from embedchain.chunkers.docx_file import DocxFileChunker
from embedchain.chunkers.gmail import GmailChunker
@@ -37,6 +38,7 @@ chunker_common_config = {
GmailChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
PostgresChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
SlackChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
DiscourseChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
}

View File

@@ -4,7 +4,8 @@ from string import Template
from embedchain import App
from embedchain.config import AppConfig, BaseLlmConfig
from embedchain.helper.json_serializable import JSONSerializable, register_deserializable
from embedchain.helper.json_serializable import (JSONSerializable,
register_deserializable)
class TestJsonSerializable(unittest.TestCase):

View File

@@ -0,0 +1,118 @@
import pytest
import requests
from embedchain.loaders.discourse import DiscourseLoader
@pytest.fixture
def discourse_loader_config():
return {
"domain": "https://example.com",
}
@pytest.fixture
def discourse_loader(discourse_loader_config):
return DiscourseLoader(config=discourse_loader_config)
def test_discourse_loader_init_with_valid_config():
config = {"domain": "https://example.com"}
loader = DiscourseLoader(config=config)
assert loader.domain == "https://example.com"
def test_discourse_loader_init_with_missing_config():
with pytest.raises(ValueError, match="DiscourseLoader requires a config"):
DiscourseLoader()
def test_discourse_loader_init_with_missing_domain():
config = {"another_key": "value"}
with pytest.raises(ValueError, match="DiscourseLoader requires a domain"):
DiscourseLoader(config=config)
def test_discourse_loader_check_query_with_valid_query(discourse_loader):
discourse_loader._check_query("sample query")
def test_discourse_loader_check_query_with_empty_query(discourse_loader):
with pytest.raises(ValueError, match="DiscourseLoader requires a query"):
discourse_loader._check_query("")
def test_discourse_loader_check_query_with_invalid_query_type(discourse_loader):
with pytest.raises(ValueError, match="DiscourseLoader requires a query"):
discourse_loader._check_query(123)
def test_discourse_loader_load_post_with_valid_post_id(discourse_loader, monkeypatch):
def mock_get(*args, **kwargs):
class MockResponse:
def json(self):
return {"raw": "Sample post content"}
def raise_for_status(self):
pass
return MockResponse()
monkeypatch.setattr(requests, "get", mock_get)
post_data = discourse_loader._load_post(123)
assert post_data["content"] == "Sample post content"
assert "meta_data" in post_data
def test_discourse_loader_load_post_with_invalid_post_id(discourse_loader, monkeypatch):
def mock_get(*args, **kwargs):
class MockResponse:
def raise_for_status(self):
raise requests.exceptions.RequestException("Test error")
return MockResponse()
monkeypatch.setattr(requests, "get", mock_get)
with pytest.raises(Exception, match="Test error"):
discourse_loader._load_post(123)
def test_discourse_loader_load_data_with_valid_query(discourse_loader, monkeypatch):
def mock_get(*args, **kwargs):
class MockResponse:
def json(self):
return {"grouped_search_result": {"post_ids": [123, 456, 789]}}
def raise_for_status(self):
pass
return MockResponse()
monkeypatch.setattr(requests, "get", mock_get)
def mock_load_post(*args, **kwargs):
return {
"content": "Sample post content",
"meta_data": {
"url": "https://example.com/posts/123.json",
"created_at": "2021-01-01",
"username": "test_user",
"topic_slug": "test_topic",
"score": 10,
},
}
monkeypatch.setattr(discourse_loader, "_load_post", mock_load_post)
data = discourse_loader.load_data("sample query")
assert len(data["data"]) == 3
assert data["data"][0]["content"] == "Sample post content"
assert data["data"][0]["meta_data"]["url"] == "https://example.com/posts/123.json"
assert data["data"][0]["meta_data"]["created_at"] == "2021-01-01"
assert data["data"][0]["meta_data"]["username"] == "test_user"
assert data["data"][0]["meta_data"]["topic_slug"] == "test_topic"
assert data["data"][0]["meta_data"]["score"] == 10