[Feature] Discourse Loader (#948)
Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from embedchain.chunkers.discourse import DiscourseChunker
|
||||
from embedchain.chunkers.docs_site import DocsSiteChunker
|
||||
from embedchain.chunkers.docx_file import DocxFileChunker
|
||||
from embedchain.chunkers.gmail import GmailChunker
|
||||
@@ -37,6 +38,7 @@ chunker_common_config = {
|
||||
GmailChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||
PostgresChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||
SlackChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||
DiscourseChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,8 @@ from string import Template
|
||||
|
||||
from embedchain import App
|
||||
from embedchain.config import AppConfig, BaseLlmConfig
|
||||
from embedchain.helper.json_serializable import JSONSerializable, register_deserializable
|
||||
from embedchain.helper.json_serializable import (JSONSerializable,
|
||||
register_deserializable)
|
||||
|
||||
|
||||
class TestJsonSerializable(unittest.TestCase):
|
||||
|
||||
118
tests/loaders/test_discourse.py
Normal file
118
tests/loaders/test_discourse.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from embedchain.loaders.discourse import DiscourseLoader
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def discourse_loader_config():
|
||||
return {
|
||||
"domain": "https://example.com",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def discourse_loader(discourse_loader_config):
|
||||
return DiscourseLoader(config=discourse_loader_config)
|
||||
|
||||
|
||||
def test_discourse_loader_init_with_valid_config():
|
||||
config = {"domain": "https://example.com"}
|
||||
loader = DiscourseLoader(config=config)
|
||||
assert loader.domain == "https://example.com"
|
||||
|
||||
|
||||
def test_discourse_loader_init_with_missing_config():
|
||||
with pytest.raises(ValueError, match="DiscourseLoader requires a config"):
|
||||
DiscourseLoader()
|
||||
|
||||
|
||||
def test_discourse_loader_init_with_missing_domain():
|
||||
config = {"another_key": "value"}
|
||||
with pytest.raises(ValueError, match="DiscourseLoader requires a domain"):
|
||||
DiscourseLoader(config=config)
|
||||
|
||||
|
||||
def test_discourse_loader_check_query_with_valid_query(discourse_loader):
|
||||
discourse_loader._check_query("sample query")
|
||||
|
||||
|
||||
def test_discourse_loader_check_query_with_empty_query(discourse_loader):
|
||||
with pytest.raises(ValueError, match="DiscourseLoader requires a query"):
|
||||
discourse_loader._check_query("")
|
||||
|
||||
|
||||
def test_discourse_loader_check_query_with_invalid_query_type(discourse_loader):
|
||||
with pytest.raises(ValueError, match="DiscourseLoader requires a query"):
|
||||
discourse_loader._check_query(123)
|
||||
|
||||
|
||||
def test_discourse_loader_load_post_with_valid_post_id(discourse_loader, monkeypatch):
|
||||
def mock_get(*args, **kwargs):
|
||||
class MockResponse:
|
||||
def json(self):
|
||||
return {"raw": "Sample post content"}
|
||||
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
|
||||
return MockResponse()
|
||||
|
||||
monkeypatch.setattr(requests, "get", mock_get)
|
||||
|
||||
post_data = discourse_loader._load_post(123)
|
||||
|
||||
assert post_data["content"] == "Sample post content"
|
||||
assert "meta_data" in post_data
|
||||
|
||||
|
||||
def test_discourse_loader_load_post_with_invalid_post_id(discourse_loader, monkeypatch):
|
||||
def mock_get(*args, **kwargs):
|
||||
class MockResponse:
|
||||
def raise_for_status(self):
|
||||
raise requests.exceptions.RequestException("Test error")
|
||||
|
||||
return MockResponse()
|
||||
|
||||
monkeypatch.setattr(requests, "get", mock_get)
|
||||
|
||||
with pytest.raises(Exception, match="Test error"):
|
||||
discourse_loader._load_post(123)
|
||||
|
||||
|
||||
def test_discourse_loader_load_data_with_valid_query(discourse_loader, monkeypatch):
|
||||
def mock_get(*args, **kwargs):
|
||||
class MockResponse:
|
||||
def json(self):
|
||||
return {"grouped_search_result": {"post_ids": [123, 456, 789]}}
|
||||
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
|
||||
return MockResponse()
|
||||
|
||||
monkeypatch.setattr(requests, "get", mock_get)
|
||||
|
||||
def mock_load_post(*args, **kwargs):
|
||||
return {
|
||||
"content": "Sample post content",
|
||||
"meta_data": {
|
||||
"url": "https://example.com/posts/123.json",
|
||||
"created_at": "2021-01-01",
|
||||
"username": "test_user",
|
||||
"topic_slug": "test_topic",
|
||||
"score": 10,
|
||||
},
|
||||
}
|
||||
|
||||
monkeypatch.setattr(discourse_loader, "_load_post", mock_load_post)
|
||||
|
||||
data = discourse_loader.load_data("sample query")
|
||||
|
||||
assert len(data["data"]) == 3
|
||||
assert data["data"][0]["content"] == "Sample post content"
|
||||
assert data["data"][0]["meta_data"]["url"] == "https://example.com/posts/123.json"
|
||||
assert data["data"][0]["meta_data"]["created_at"] == "2021-01-01"
|
||||
assert data["data"][0]["meta_data"]["username"] == "test_user"
|
||||
assert data["data"][0]["meta_data"]["topic_slug"] == "test_topic"
|
||||
assert data["data"][0]["meta_data"]["score"] == 10
|
||||
Reference in New Issue
Block a user