feat: csv loader (#470)

Co-authored-by: Taranjeet Singh <reachtotj@gmail.com>
This commit is contained in:
cachho
2023-09-05 10:18:03 +02:00
committed by GitHub
parent 344e7470f6
commit bd595f84e8
7 changed files with 172 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
from typing import Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
class TableChunker(BaseChunker):
"""Chunker for tables, for instance csv, google sheets or databases."""
def __init__(self, config: Optional[ChunkerConfig] = None):
if config is None:
config = ChunkerConfig(chunk_size=300, chunk_overlap=0, length_function=len)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=config.chunk_size,
chunk_overlap=config.chunk_overlap,
length_function=config.length_function,
)
super().__init__(text_splitter)