[Features] Add Github and Youtube Channel loaders (#957)

Co-authored-by: Deven Patel <deven298@yahoo.com>
Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
This commit is contained in:
Deven Patel
2023-11-15 19:17:42 -08:00
committed by GitHub
parent 3fa7db8420
commit 07fb6bee54
9 changed files with 303 additions and 5 deletions

View File

@@ -1,5 +1,8 @@
import hashlib
from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.config.add_config import ChunkerConfig
from embedchain.helper.json_serializable import JSONSerializable
from embedchain.models.data_type import DataType
@@ -7,7 +10,15 @@ from embedchain.models.data_type import DataType
class BaseChunker(JSONSerializable):
def __init__(self, text_splitter):
"""Initialize the chunker."""
self.text_splitter = text_splitter
if text_splitter is None:
config = ChunkerConfig(chunk_size=1000, chunk_overlap=0, length_function=len)
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=config.chunk_size,
chunk_overlap=config.chunk_overlap,
length_function=config.length_function,
)
else:
self.text_splitter = text_splitter
self.data_type = None
def create_chunks(self, loader, src, app_id=None):