[Feature] Improve github and youtube channel loader (#966)

Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
Deshraj Yadav
2023-11-17 18:25:14 -08:00
committed by GitHub
parent 51df00729e
commit 9fcf2130b5
13 changed files with 117 additions and 268 deletions

View File

@@ -0,0 +1,22 @@
from typing import Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.add_config import ChunkerConfig
from embedchain.helper.json_serializable import register_deserializable
@register_deserializable
class CommonChunker(BaseChunker):
"""Common chunker for all loaders."""
def __init__(self, config: Optional[ChunkerConfig] = None):
if config is None:
config = ChunkerConfig(chunk_size=1000, chunk_overlap=0, length_function=len)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=config.chunk_size,
chunk_overlap=config.chunk_overlap,
length_function=config.length_function,
)
super().__init__(text_splitter)