[Bug fix] Fix typos, static methods and other sanity improvements in the package (#1129)
This commit is contained in:
@@ -21,7 +21,7 @@ class WebPageLoader(BaseLoader):
|
||||
_session = requests.Session()
|
||||
|
||||
def load_data(self, url):
|
||||
"""Load data from a web page using a shared requests session."""
|
||||
"""Load data from a web page using a shared requests' session."""
|
||||
response = self._session.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.content
|
||||
@@ -40,7 +40,8 @@ class WebPageLoader(BaseLoader):
|
||||
],
|
||||
}
|
||||
|
||||
def _get_clean_content(self, html, url) -> str:
|
||||
@staticmethod
|
||||
def _get_clean_content(html, url) -> str:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
original_size = len(str(soup.get_text()))
|
||||
|
||||
@@ -60,8 +61,8 @@ class WebPageLoader(BaseLoader):
|
||||
tag.decompose()
|
||||
|
||||
ids_to_exclude = ["sidebar", "main-navigation", "menu-main-menu"]
|
||||
for id in ids_to_exclude:
|
||||
tags = soup.find_all(id=id)
|
||||
for id_ in ids_to_exclude:
|
||||
tags = soup.find_all(id=id_)
|
||||
for tag in tags:
|
||||
tag.decompose()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user