[improvement] update web page default chunk size to 2000 (#1005)

Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
Deven Patel
2023-12-12 23:06:13 -08:00
committed by GitHub
parent 7910cee259
commit ae6f866901
2 changed files with 2 additions and 2 deletions

View File

@@ -13,7 +13,7 @@ class WebPageChunker(BaseChunker):
def __init__(self, config: Optional[ChunkerConfig] = None):
if config is None:
config = ChunkerConfig(chunk_size=500, chunk_overlap=0, length_function=len)
config = ChunkerConfig(chunk_size=2000, chunk_overlap=0, length_function=len)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=config.chunk_size,
chunk_overlap=config.chunk_overlap,

View File

@@ -31,7 +31,7 @@ chunker_common_config = {
QnaPairChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
TableChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
SitemapChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
WebPageChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
WebPageChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
XmlChunker: {"chunk_size": 500, "chunk_overlap": 50, "length_function": len},
YoutubeVideoChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},