[improvement] update web page default chunk size to 2000 (#1005)
Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
@@ -13,7 +13,7 @@ class WebPageChunker(BaseChunker):
|
|||||||
|
|
||||||
def __init__(self, config: Optional[ChunkerConfig] = None):
|
def __init__(self, config: Optional[ChunkerConfig] = None):
|
||||||
if config is None:
|
if config is None:
|
||||||
config = ChunkerConfig(chunk_size=500, chunk_overlap=0, length_function=len)
|
config = ChunkerConfig(chunk_size=2000, chunk_overlap=0, length_function=len)
|
||||||
text_splitter = RecursiveCharacterTextSplitter(
|
text_splitter = RecursiveCharacterTextSplitter(
|
||||||
chunk_size=config.chunk_size,
|
chunk_size=config.chunk_size,
|
||||||
chunk_overlap=config.chunk_overlap,
|
chunk_overlap=config.chunk_overlap,
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ chunker_common_config = {
|
|||||||
QnaPairChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
|
QnaPairChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
|
||||||
TableChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
|
TableChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len},
|
||||||
SitemapChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
|
SitemapChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
|
||||||
WebPageChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len},
|
WebPageChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
|
||||||
XmlChunker: {"chunk_size": 500, "chunk_overlap": 50, "length_function": len},
|
XmlChunker: {"chunk_size": 500, "chunk_overlap": 50, "length_function": len},
|
||||||
YoutubeVideoChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
|
YoutubeVideoChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len},
|
||||||
JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
|
||||||
|
|||||||
Reference in New Issue
Block a user