Feat/serialize deserialize (#508)
Co-authored-by: Taranjeet Singh <reachtotj@gmail.com>
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
class BaseLoader:
|
||||
from embedchain.helper_classes.json_serializable import JSONSerializable
|
||||
|
||||
|
||||
class BaseLoader(JSONSerializable):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
@@ -4,9 +4,11 @@ from urllib.parse import urljoin, urlparse
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class DocsSiteLoader(BaseLoader):
|
||||
def __init__(self):
|
||||
self.visited_links = set()
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from langchain.document_loaders import Docx2txtLoader
|
||||
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class DocxFileLoader(BaseLoader):
|
||||
def load_data(self, url):
|
||||
"""Load data from a .docx file."""
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class LocalQnaPairLoader(BaseLoader):
|
||||
def load_data(self, content):
|
||||
"""Load data from a local QnA pair."""
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class LocalTextLoader(BaseLoader):
|
||||
def load_data(self, content):
|
||||
"""Load data from a local text file."""
|
||||
|
||||
@@ -7,10 +7,12 @@ except ImportError:
|
||||
raise ImportError("Notion requires extra dependencies. Install with `pip install embedchain[community]`") from None
|
||||
|
||||
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils import clean_string
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class NotionLoader(BaseLoader):
|
||||
def load_data(self, source):
|
||||
"""Load data from a PDF file."""
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from langchain.document_loaders import PyPDFLoader
|
||||
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils import clean_string
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class PdfFileLoader(BaseLoader):
|
||||
def load_data(self, url):
|
||||
"""Load data from a PDF file."""
|
||||
|
||||
@@ -4,11 +4,13 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.builder import ParserRejectedMarkup
|
||||
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.loaders.web_page import WebPageLoader
|
||||
from embedchain.utils import is_readable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class SitemapLoader(BaseLoader):
|
||||
def load_data(self, sitemap_url):
|
||||
"""
|
||||
|
||||
@@ -3,10 +3,12 @@ import logging
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils import clean_string
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class WebPageLoader(BaseLoader):
|
||||
def load_data(self, url):
|
||||
"""Load data from a web page."""
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from langchain.document_loaders import YoutubeLoader
|
||||
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils import clean_string
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class YoutubeVideoLoader(BaseLoader):
|
||||
def load_data(self, url):
|
||||
"""Load data from a Youtube video."""
|
||||
|
||||
Reference in New Issue
Block a user