chore: load chunker from config (#270)

This commit is contained in:
cachho
2023-07-17 17:54:35 +02:00
committed by GitHub
parent 07ba65d88d
commit 9c58627372
10 changed files with 48 additions and 69 deletions

View File

@@ -5,18 +5,16 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
TEXT_SPLITTER_CHUNK_PARAMS = {
"chunk_size": 300,
"chunk_overlap": 0,
"length_function": len,
}
class TextChunker(BaseChunker):
"""Chunker for text."""
def __init__(self, config: Optional[ChunkerConfig] = None):
if config is None:
config = TEXT_SPLITTER_CHUNK_PARAMS
text_splitter = RecursiveCharacterTextSplitter(**config)
config = ChunkerConfig(chunk_size=300, chunk_overlap=0, length_function=len)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=config.chunk_size,
chunk_overlap=config.chunk_overlap,
length_function=config.length_function,
)
super().__init__(text_splitter)