Feat/serialize deserialize (#508)
Co-authored-by: Taranjeet Singh <reachtotj@gmail.com>
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
import hashlib
|
||||
|
||||
from embedchain.helper_classes.json_serializable import JSONSerializable
|
||||
from embedchain.models.data_type import DataType
|
||||
|
||||
|
||||
class BaseChunker:
|
||||
class BaseChunker(JSONSerializable):
|
||||
def __init__(self, text_splitter):
|
||||
"""Initialize the chunker."""
|
||||
self.text_splitter = text_splitter
|
||||
|
||||
@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config.AddConfig import ChunkerConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class DocsSiteChunker(BaseChunker):
|
||||
"""Chunker for code docs site."""
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config.AddConfig import ChunkerConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class DocxFileChunker(BaseChunker):
|
||||
"""Chunker for .docx file."""
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config.AddConfig import ChunkerConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class NotionChunker(BaseChunker):
|
||||
"""Chunker for notion."""
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config.AddConfig import ChunkerConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class PdfFileChunker(BaseChunker):
|
||||
"""Chunker for PDF file."""
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config.AddConfig import ChunkerConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class QnaPairChunker(BaseChunker):
|
||||
"""Chunker for QnA pair."""
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config.AddConfig import ChunkerConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class TextChunker(BaseChunker):
|
||||
"""Chunker for text."""
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config.AddConfig import ChunkerConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class WebPageChunker(BaseChunker):
|
||||
"""Chunker for web page."""
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
from embedchain.config.AddConfig import ChunkerConfig
|
||||
from embedchain.helper_classes.json_serializable import register_deserializable
|
||||
|
||||
|
||||
@register_deserializable
|
||||
class YoutubeVideoChunker(BaseChunker):
|
||||
"""Chunker for Youtube video."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user