21 lines
748 B
Python
21 lines
748 B
Python
from typing import Optional
|
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
from embedchain.chunkers.base_chunker import BaseChunker
|
|
from embedchain.config.AddConfig import ChunkerConfig
|
|
|
|
|
|
class TableChunker(BaseChunker):
|
|
"""Chunker for tables, for instance csv, google sheets or databases."""
|
|
|
|
def __init__(self, config: Optional[ChunkerConfig] = None):
|
|
if config is None:
|
|
config = ChunkerConfig(chunk_size=300, chunk_overlap=0, length_function=len)
|
|
text_splitter = RecursiveCharacterTextSplitter(
|
|
chunk_size=config.chunk_size,
|
|
chunk_overlap=config.chunk_overlap,
|
|
length_function=config.length_function,
|
|
)
|
|
super().__init__(text_splitter)
|