Feat/serialize deserialize (#508)

Co-authored-by: Taranjeet Singh <reachtotj@gmail.com>
This commit is contained in:
cachho
2023-09-03 21:50:18 +02:00
committed by GitHub
parent 2aa25a5169
commit 0d4ad07d7b
42 changed files with 345 additions and 8 deletions

View File

@@ -1,9 +1,10 @@
import hashlib
from embedchain.helper_classes.json_serializable import JSONSerializable
from embedchain.models.data_type import DataType
class BaseChunker:
class BaseChunker(JSONSerializable):
def __init__(self, text_splitter):
"""Initialize the chunker."""
self.text_splitter = text_splitter

View File

@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class DocsSiteChunker(BaseChunker):
"""Chunker for code docs site."""

View File

@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class DocxFileChunker(BaseChunker):
"""Chunker for .docx file."""

View File

@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class NotionChunker(BaseChunker):
"""Chunker for notion."""

View File

@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class PdfFileChunker(BaseChunker):
"""Chunker for PDF file."""

View File

@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class QnaPairChunker(BaseChunker):
"""Chunker for QnA pair."""

View File

@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class TextChunker(BaseChunker):
"""Chunker for text."""

View File

@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class WebPageChunker(BaseChunker):
"""Chunker for web page."""

View File

@@ -4,8 +4,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
from embedchain.helper_classes.json_serializable import register_deserializable
@register_deserializable
class YoutubeVideoChunker(BaseChunker):
"""Chunker for Youtube video."""