[improvement] update web page default chunk size to 2000 (#1005)
Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
@@ -13,7 +13,7 @@ class WebPageChunker(BaseChunker):
|
||||
|
||||
def __init__(self, config: Optional[ChunkerConfig] = None):
|
||||
if config is None:
|
||||
config = ChunkerConfig(chunk_size=500, chunk_overlap=0, length_function=len)
|
||||
config = ChunkerConfig(chunk_size=2000, chunk_overlap=0, length_function=len)
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=config.chunk_size,
|
||||
chunk_overlap=config.chunk_overlap,
|
||||
|
||||
@@ -31,7 +31,7 @@ chunker_common_config = {
|
||||
QnaPairChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
|
||||
TableChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
|
||||
SitemapChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
|
||||
WebPageChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
|
||||
WebPageChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
|
||||
XmlChunker: {"chunk_size": 500, "chunk_overlap": 50, "length_function": len},
|
||||
YoutubeVideoChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
|
||||
JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||
|
||||
Reference in New Issue
Block a user