feat: changed doc_file to docx and update readme (#157)

This commit is contained in:
Sahil Kumar Yadav
2023-07-07 16:18:05 +05:30
committed by GitHub
parent 51adc5c886
commit 0bb3d0afe9
5 changed files with 21 additions and 18 deletions

View File

@@ -4,13 +4,13 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
TEXT_SPLITTER_CHUNK_PARAMS = {
"chunk_size": 500,
"chunk_size": 1000,
"chunk_overlap": 0,
"length_function": len,
}
class DocFileChunker(BaseChunker):
class DocxFileChunker(BaseChunker):
def __init__(self):
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
super().__init__(text_splitter)