Files
t6_mem0/embedchain/chunkers/pdf_file.py
Taranjeet Singh 4329caa17c Chunkers: Refactor each chunker & add base class
Adds a base chunker from which any chunker can inherit.
Existing chunkers are refactored to inherit from this base
chunker.
2023-06-20 16:30:23 +05:30

16 lines
421 B
Python

from embedchain.chunkers.base_chunker import BaseChunker
from langchain.text_splitter import RecursiveCharacterTextSplitter
TEXT_SPLITTER_CHUNK_PARAMS = {
"chunk_size": 1000,
"chunk_overlap": 0,
"length_function": len,
}
class PdfFileChunker(BaseChunker):
def __init__(self):
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
super().__init__(text_splitter)