Feat/serialize deserialize (#508)

Co-authored-by: Taranjeet Singh <reachtotj@gmail.com>
This commit is contained in:
cachho
2023-09-03 21:50:18 +02:00
committed by GitHub
parent 2aa25a5169
commit 0d4ad07d7b
42 changed files with 345 additions and 8 deletions

View File

@@ -1,4 +1,7 @@
class BaseLoader:
from embedchain.helper_classes.json_serializable import JSONSerializable
class BaseLoader(JSONSerializable):
def __init__(self):
pass

View File

@@ -4,9 +4,11 @@ from urllib.parse import urljoin, urlparse
import requests
from bs4 import BeautifulSoup
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
@register_deserializable
class DocsSiteLoader(BaseLoader):
def __init__(self):
self.visited_links = set()

View File

@@ -1,8 +1,10 @@
from langchain.document_loaders import Docx2txtLoader
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
@register_deserializable
class DocxFileLoader(BaseLoader):
def load_data(self, url):
"""Load data from a .docx file."""

View File

@@ -1,6 +1,8 @@
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
@register_deserializable
class LocalQnaPairLoader(BaseLoader):
def load_data(self, content):
"""Load data from a local QnA pair."""

View File

@@ -1,6 +1,8 @@
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
@register_deserializable
class LocalTextLoader(BaseLoader):
def load_data(self, content):
"""Load data from a local text file."""

View File

@@ -7,10 +7,12 @@ except ImportError:
raise ImportError("Notion requires extra dependencies. Install with `pip install embedchain[community]`") from None
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
@register_deserializable
class NotionLoader(BaseLoader):
def load_data(self, source):
"""Load data from a PDF file."""

View File

@@ -1,9 +1,11 @@
from langchain.document_loaders import PyPDFLoader
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
@register_deserializable
class PdfFileLoader(BaseLoader):
def load_data(self, url):
"""Load data from a PDF file."""

View File

@@ -4,11 +4,13 @@ import requests
from bs4 import BeautifulSoup
from bs4.builder import ParserRejectedMarkup
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.loaders.web_page import WebPageLoader
from embedchain.utils import is_readable
@register_deserializable
class SitemapLoader(BaseLoader):
def load_data(self, sitemap_url):
"""

View File

@@ -3,10 +3,12 @@ import logging
import requests
from bs4 import BeautifulSoup
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
@register_deserializable
class WebPageLoader(BaseLoader):
def load_data(self, url):
"""Load data from a web page."""

View File

@@ -1,9 +1,11 @@
from langchain.document_loaders import YoutubeLoader
from embedchain.helper_classes.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
@register_deserializable
class YoutubeVideoLoader(BaseLoader):
def load_data(self, url):
"""Load data from a Youtube video."""