[Bug fix] Fix typos, static methods and other sanity improvements in the package (#1129)

This commit is contained in:
Sandra Serrano
2024-01-08 19:47:46 +01:00
committed by GitHub
parent 62c0c52e31
commit 2496ed133e
41 changed files with 133 additions and 103 deletions

View File

@@ -21,7 +21,7 @@ class WebPageLoader(BaseLoader):
_session = requests.Session()
def load_data(self, url):
"""Load data from a web page using a shared requests session."""
"""Load data from a web page using a shared requests' session."""
response = self._session.get(url, timeout=30)
response.raise_for_status()
data = response.content
@@ -40,7 +40,8 @@ class WebPageLoader(BaseLoader):
],
}
def _get_clean_content(self, html, url) -> str:
@staticmethod
def _get_clean_content(html, url) -> str:
soup = BeautifulSoup(html, "html.parser")
original_size = len(str(soup.get_text()))
@@ -60,8 +61,8 @@ class WebPageLoader(BaseLoader):
tag.decompose()
ids_to_exclude = ["sidebar", "main-navigation", "menu-main-menu"]
for id in ids_to_exclude:
tags = soup.find_all(id=id)
for id_ in ids_to_exclude:
tags = soup.find_all(id=id_)
for tag in tags:
tag.decompose()