Adding fetching data functionality for reference links in the web page (#1806)

This commit is contained in:
Vatsal Rathod
2024-10-15 07:26:35 -04:00
committed by GitHub
parent 721d765921
commit 20c3aee636
9 changed files with 86 additions and 8 deletions

View File

@@ -2,6 +2,7 @@ import hashlib
from unittest.mock import Mock, patch
import pytest
import requests
from embedchain.loaders.web_page import WebPageLoader
@@ -115,3 +116,33 @@ def test_get_clean_content_excludes_unnecessary_info(web_page_loader):
assert class_name not in content
assert len(content) > 0
def test_fetch_reference_links_success(web_page_loader):
# Mock a successful response
response = Mock(spec=requests.Response)
response.status_code = 200
response.content = b"""
<html>
<body>
<a href="http://example.com">Example</a>
<a href="https://another-example.com">Another Example</a>
<a href="/relative-link">Relative Link</a>
</body>
</html>
"""
expected_links = ["http://example.com", "https://another-example.com"]
result = web_page_loader.fetch_reference_links(response)
assert result == expected_links
def test_fetch_reference_links_failure(web_page_loader):
# Mock a failed response
response = Mock(spec=requests.Response)
response.status_code = 404
response.content = b""
expected_links = []
result = web_page_loader.fetch_reference_links(response)
assert result == expected_links