[Refactor] Improve logging package wide (#1315)
This commit is contained in:
@@ -9,6 +9,8 @@ from embedchain.helpers.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils.misc import is_readable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class BeehiivLoader(BaseLoader):
|
||||
@@ -90,9 +92,9 @@ class BeehiivLoader(BaseLoader):
|
||||
if is_readable(data):
|
||||
return data
|
||||
else:
|
||||
logging.warning(f"Page is not readable (too many invalid characters): {link}")
|
||||
logger.warning(f"Page is not readable (too many invalid characters): {link}")
|
||||
except ParserRejectedMarkup as e:
|
||||
logging.error(f"Failed to parse {link}: {e}")
|
||||
logger.error(f"Failed to parse {link}: {e}")
|
||||
return None
|
||||
|
||||
for link in links:
|
||||
|
||||
@@ -10,6 +10,8 @@ from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.loaders.text_file import TextFileLoader
|
||||
from embedchain.utils.misc import detect_datatype
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class DirectoryLoader(BaseLoader):
|
||||
@@ -27,12 +29,12 @@ class DirectoryLoader(BaseLoader):
|
||||
if not directory_path.is_dir():
|
||||
raise ValueError(f"Invalid path: {path}")
|
||||
|
||||
logging.info(f"Loading data from directory: {path}")
|
||||
logger.info(f"Loading data from directory: {path}")
|
||||
data_list = self._process_directory(directory_path)
|
||||
doc_id = hashlib.sha256((str(data_list) + str(directory_path)).encode()).hexdigest()
|
||||
|
||||
for error in self.errors:
|
||||
logging.warning(error)
|
||||
logger.warning(error)
|
||||
|
||||
return {"doc_id": doc_id, "data": data_list}
|
||||
|
||||
@@ -46,7 +48,7 @@ class DirectoryLoader(BaseLoader):
|
||||
loader = self._predict_loader(file_path)
|
||||
data_list.extend(loader.load_data(str(file_path))["data"])
|
||||
elif file_path.is_dir():
|
||||
logging.info(f"Loading data from directory: {file_path}")
|
||||
logger.info(f"Loading data from directory: {file_path}")
|
||||
return data_list
|
||||
|
||||
def _predict_loader(self, file_path: Path) -> BaseLoader:
|
||||
|
||||
@@ -5,6 +5,8 @@ import os
|
||||
from embedchain.helpers.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class DiscordLoader(BaseLoader):
|
||||
@@ -102,7 +104,7 @@ class DiscordLoader(BaseLoader):
|
||||
|
||||
class DiscordClient(discord.Client):
|
||||
async def on_ready(self) -> None:
|
||||
logging.info("Logged on as {0}!".format(self.user))
|
||||
logger.info("Logged on as {0}!".format(self.user))
|
||||
try:
|
||||
channel = self.get_channel(int(channel_id))
|
||||
if not isinstance(channel, discord.TextChannel):
|
||||
@@ -121,7 +123,7 @@ class DiscordLoader(BaseLoader):
|
||||
messages.append(DiscordLoader._format_message(thread_message))
|
||||
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
logger.error(e)
|
||||
await self.close()
|
||||
finally:
|
||||
await self.close()
|
||||
|
||||
@@ -8,6 +8,8 @@ import requests
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils.misc import clean_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DiscourseLoader(BaseLoader):
|
||||
def __init__(self, config: Optional[dict[str, Any]] = None):
|
||||
@@ -35,7 +37,7 @@ class DiscourseLoader(BaseLoader):
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load post {post_id}: {e}")
|
||||
logger.error(f"Failed to load post {post_id}: {e}")
|
||||
return
|
||||
response_data = response.json()
|
||||
post_contents = clean_string(response_data.get("raw"))
|
||||
@@ -56,7 +58,7 @@ class DiscourseLoader(BaseLoader):
|
||||
self._check_query(query)
|
||||
data = []
|
||||
data_contents = []
|
||||
logging.info(f"Searching data on discourse url: {self.domain}, for query: {query}")
|
||||
logger.info(f"Searching data on discourse url: {self.domain}, for query: {query}")
|
||||
search_url = f"{self.domain}search.json?q={query}"
|
||||
response = requests.get(search_url)
|
||||
try:
|
||||
|
||||
@@ -15,6 +15,8 @@ except ImportError:
|
||||
from embedchain.helpers.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class DocsSiteLoader(BaseLoader):
|
||||
@@ -28,7 +30,7 @@ class DocsSiteLoader(BaseLoader):
|
||||
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
logging.info(f"Failed to fetch the website: {response.status_code}")
|
||||
logger.info(f"Failed to fetch the website: {response.status_code}")
|
||||
return
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
@@ -53,7 +55,7 @@ class DocsSiteLoader(BaseLoader):
|
||||
def _load_data_from_url(url: str) -> list:
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
logging.info(f"Failed to fetch the website: {response.status_code}")
|
||||
logger.info(f"Failed to fetch the website: {response.status_code}")
|
||||
return []
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
@@ -22,6 +22,8 @@ except ImportError:
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils.misc import clean_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GmailReader:
|
||||
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
|
||||
@@ -114,7 +116,7 @@ class GmailLoader(BaseLoader):
|
||||
def load_data(self, query: str):
|
||||
reader = GmailReader(query=query)
|
||||
emails = reader.load_emails()
|
||||
logging.info(f"Gmail Loader: {len(emails)} emails found for query '{query}'")
|
||||
logger.info(f"Gmail Loader: {len(emails)} emails found for query '{query}'")
|
||||
|
||||
data = []
|
||||
for email in emails:
|
||||
|
||||
@@ -5,6 +5,8 @@ from typing import Any, Optional
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils.misc import clean_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MySQLLoader(BaseLoader):
|
||||
def __init__(self, config: Optional[dict[str, Any]]):
|
||||
@@ -32,7 +34,7 @@ class MySQLLoader(BaseLoader):
|
||||
self.connection = sqlconnector.connection.MySQLConnection(**config)
|
||||
self.cursor = self.connection.cursor()
|
||||
except (sqlconnector.Error, IOError) as err:
|
||||
logging.info(f"Connection failed: {err}")
|
||||
logger.info(f"Connection failed: {err}")
|
||||
raise ValueError(
|
||||
f"Unable to connect with the given config: {config}.",
|
||||
"Please provide the correct configuration to load data from you MySQL DB. \
|
||||
|
||||
@@ -9,6 +9,8 @@ from embedchain.helpers.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils.misc import clean_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NotionDocument:
|
||||
"""
|
||||
@@ -98,7 +100,7 @@ class NotionLoader(BaseLoader):
|
||||
|
||||
id = source[-32:]
|
||||
formatted_id = f"{id[:8]}-{id[8:12]}-{id[12:16]}-{id[16:20]}-{id[20:]}"
|
||||
logging.debug(f"Extracted notion page id as: {formatted_id}")
|
||||
logger.debug(f"Extracted notion page id as: {formatted_id}")
|
||||
|
||||
integration_token = os.getenv("NOTION_INTEGRATION_TOKEN")
|
||||
reader = NotionPageLoader(integration_token=integration_token)
|
||||
|
||||
@@ -4,6 +4,8 @@ from typing import Any, Optional
|
||||
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PostgresLoader(BaseLoader):
|
||||
def __init__(self, config: Optional[dict[str, Any]] = None):
|
||||
@@ -32,7 +34,7 @@ class PostgresLoader(BaseLoader):
|
||||
conn_params.append(f"{key}={value}")
|
||||
config_info = " ".join(conn_params)
|
||||
|
||||
logging.info(f"Connecting to postrgres sql: {config_info}")
|
||||
logger.info(f"Connecting to postrgres sql: {config_info}")
|
||||
self.connection = psycopg.connect(conninfo=config_info)
|
||||
self.cursor = self.connection.cursor()
|
||||
|
||||
|
||||
@@ -19,6 +19,8 @@ from embedchain.helpers.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.loaders.web_page import WebPageLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class SitemapLoader(BaseLoader):
|
||||
@@ -41,7 +43,7 @@ class SitemapLoader(BaseLoader):
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, "xml")
|
||||
except requests.RequestException as e:
|
||||
logging.error(f"Error fetching sitemap from URL: {e}")
|
||||
logger.error(f"Error fetching sitemap from URL: {e}")
|
||||
return
|
||||
elif os.path.isfile(sitemap_source):
|
||||
with open(sitemap_source, "r") as file:
|
||||
@@ -60,7 +62,7 @@ class SitemapLoader(BaseLoader):
|
||||
loader_data = web_page_loader.load_data(link)
|
||||
return loader_data.get("data")
|
||||
except ParserRejectedMarkup as e:
|
||||
logging.error(f"Failed to parse {link}: {e}")
|
||||
logger.error(f"Failed to parse {link}: {e}")
|
||||
return None
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
@@ -72,6 +74,6 @@ class SitemapLoader(BaseLoader):
|
||||
if data:
|
||||
output.extend(data)
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading page {link}: {e}")
|
||||
logger.error(f"Error loading page {link}: {e}")
|
||||
|
||||
return {"doc_id": doc_id, "data": output}
|
||||
|
||||
@@ -11,6 +11,8 @@ from embedchain.utils.misc import clean_string
|
||||
|
||||
SLACK_API_BASE_URL = "https://www.slack.com/api/"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SlackLoader(BaseLoader):
|
||||
def __init__(self, config: Optional[dict[str, Any]] = None):
|
||||
@@ -38,7 +40,7 @@ class SlackLoader(BaseLoader):
|
||||
"SLACK_USER_TOKEN environment variables not provided. Check `https://docs.embedchain.ai/data-sources/slack` to learn more." # noqa:E501
|
||||
)
|
||||
|
||||
logging.info(f"Creating Slack Loader with config: {config}")
|
||||
logger.info(f"Creating Slack Loader with config: {config}")
|
||||
# get slack client config params
|
||||
slack_bot_token = os.getenv("SLACK_USER_TOKEN")
|
||||
ssl_cert = ssl.create_default_context(cafile=certifi.where())
|
||||
@@ -54,7 +56,7 @@ class SlackLoader(BaseLoader):
|
||||
headers=headers,
|
||||
team_id=team_id,
|
||||
)
|
||||
logging.info("Slack Loader setup successful!")
|
||||
logger.info("Slack Loader setup successful!")
|
||||
|
||||
@staticmethod
|
||||
def _check_query(query):
|
||||
@@ -69,7 +71,7 @@ class SlackLoader(BaseLoader):
|
||||
data = []
|
||||
data_content = []
|
||||
|
||||
logging.info(f"Searching slack conversations for query: {query}")
|
||||
logger.info(f"Searching slack conversations for query: {query}")
|
||||
results = self.client.search_messages(
|
||||
query=query,
|
||||
sort="timestamp",
|
||||
@@ -79,7 +81,7 @@ class SlackLoader(BaseLoader):
|
||||
|
||||
messages = results.get("messages")
|
||||
num_message = len(messages)
|
||||
logging.info(f"Found {num_message} messages for query: {query}")
|
||||
logger.info(f"Found {num_message} messages for query: {query}")
|
||||
|
||||
matches = messages.get("matches", [])
|
||||
for message in matches:
|
||||
@@ -107,7 +109,7 @@ class SlackLoader(BaseLoader):
|
||||
"data": data,
|
||||
}
|
||||
except Exception as e:
|
||||
logging.warning(f"Error in loading slack data: {e}")
|
||||
logger.warning(f"Error in loading slack data: {e}")
|
||||
raise ValueError(
|
||||
f"Error in loading slack data: {e}. Check `https://docs.embedchain.ai/data-sources/slack` to learn more." # noqa:E501
|
||||
) from e
|
||||
|
||||
@@ -9,6 +9,8 @@ from embedchain.helpers.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils.misc import is_readable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class SubstackLoader(BaseLoader):
|
||||
@@ -90,9 +92,9 @@ class SubstackLoader(BaseLoader):
|
||||
if is_readable(data):
|
||||
return data
|
||||
else:
|
||||
logging.warning(f"Page is not readable (too many invalid characters): {link}")
|
||||
logger.warning(f"Page is not readable (too many invalid characters): {link}")
|
||||
except ParserRejectedMarkup as e:
|
||||
logging.error(f"Failed to parse {link}: {e}")
|
||||
logger.error(f"Failed to parse {link}: {e}")
|
||||
return None
|
||||
|
||||
for link in links:
|
||||
|
||||
@@ -14,6 +14,8 @@ from embedchain.helpers.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils.misc import clean_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class WebPageLoader(BaseLoader):
|
||||
@@ -87,7 +89,7 @@ class WebPageLoader(BaseLoader):
|
||||
|
||||
cleaned_size = len(content)
|
||||
if original_size != 0:
|
||||
logging.info(
|
||||
logger.info(
|
||||
f"[{url}] Cleaned page size: {cleaned_size} characters, down from {original_size} (shrunk: {original_size-cleaned_size} chars, {round((1-(cleaned_size/original_size)) * 100, 2)}%)" # noqa:E501
|
||||
)
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@ from tqdm import tqdm
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class YoutubeChannelLoader(BaseLoader):
|
||||
"""Loader for youtube channel."""
|
||||
@@ -36,7 +38,7 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
videos = [entry["url"] for entry in info_dict["entries"]]
|
||||
return videos
|
||||
except Exception:
|
||||
logging.error(f"Failed to fetch youtube videos for channel: {channel_name}")
|
||||
logger.error(f"Failed to fetch youtube videos for channel: {channel_name}")
|
||||
return []
|
||||
|
||||
def _load_yt_video(video_link):
|
||||
@@ -45,12 +47,12 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
if each_load_data:
|
||||
return each_load_data.get("data")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load youtube video {video_link}: {e}")
|
||||
logger.error(f"Failed to load youtube video {video_link}: {e}")
|
||||
return None
|
||||
|
||||
def _add_youtube_channel():
|
||||
video_links = _get_yt_video_links()
|
||||
logging.info("Loading videos from youtube channel...")
|
||||
logger.info("Loading videos from youtube channel...")
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
# Submitting all tasks and storing the future object with the video link
|
||||
future_to_video = {
|
||||
@@ -67,7 +69,7 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
data.extend(results)
|
||||
data_urls.extend([result.get("meta_data").get("url") for result in results])
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to process youtube video {video}: {e}")
|
||||
logger.error(f"Failed to process youtube video {video}: {e}")
|
||||
|
||||
_add_youtube_channel()
|
||||
doc_id = hashlib.sha256((youtube_url + ", ".join(data_urls)).encode()).hexdigest()
|
||||
|
||||
Reference in New Issue
Block a user