feat: changed doc_file to docx and update readme (#157)
This commit is contained in:
committed by
GitHub
parent
51adc5c886
commit
0bb3d0afe9
@@ -4,13 +4,13 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
|
||||
TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
"chunk_size": 500,
|
||||
"chunk_size": 1000,
|
||||
"chunk_overlap": 0,
|
||||
"length_function": len,
|
||||
}
|
||||
|
||||
|
||||
class DocFileChunker(BaseChunker):
|
||||
class DocxFileChunker(BaseChunker):
|
||||
def __init__(self):
|
||||
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
|
||||
super().__init__(text_splitter)
|
||||
Reference in New Issue
Block a user