Added documentation (#219)
This commit is contained in:
@@ -3,6 +3,7 @@ import hashlib
|
||||
|
||||
class BaseChunker:
|
||||
def __init__(self, text_splitter):
|
||||
''' Initialize the chunker. '''
|
||||
self.text_splitter = text_splitter
|
||||
|
||||
def create_chunks(self, loader, src):
|
||||
|
||||
@@ -14,6 +14,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
|
||||
|
||||
class DocxFileChunker(BaseChunker):
|
||||
''' Chunker for .docx file. '''
|
||||
def __init__(self, config: Optional[ChunkerConfig] = None):
|
||||
if config is None:
|
||||
config = TEXT_SPLITTER_CHUNK_PARAMS
|
||||
|
||||
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
|
||||
|
||||
class PdfFileChunker(BaseChunker):
|
||||
''' Chunker for PDF file. '''
|
||||
def __init__(self, config: Optional[ChunkerConfig] = None):
|
||||
if config is None:
|
||||
config = TEXT_SPLITTER_CHUNK_PARAMS
|
||||
|
||||
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
|
||||
|
||||
class QnaPairChunker(BaseChunker):
|
||||
''' Chunker for QnA pair. '''
|
||||
def __init__(self, config: Optional[ChunkerConfig] = None):
|
||||
if config is None:
|
||||
config = TEXT_SPLITTER_CHUNK_PARAMS
|
||||
|
||||
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
|
||||
|
||||
class TextChunker(BaseChunker):
|
||||
''' Chunker for text. '''
|
||||
def __init__(self, config: Optional[ChunkerConfig] = None):
|
||||
if config is None:
|
||||
config = TEXT_SPLITTER_CHUNK_PARAMS
|
||||
|
||||
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
|
||||
|
||||
class WebPageChunker(BaseChunker):
|
||||
''' Chunker for web page. '''
|
||||
def __init__(self, config: Optional[ChunkerConfig] = None):
|
||||
if config is None:
|
||||
config = TEXT_SPLITTER_CHUNK_PARAMS
|
||||
|
||||
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
|
||||
|
||||
class YoutubeVideoChunker(BaseChunker):
|
||||
''' Chunker for Youtube video. '''
|
||||
def __init__(self, config: Optional[ChunkerConfig] = None):
|
||||
if config is None:
|
||||
config = TEXT_SPLITTER_CHUNK_PARAMS
|
||||
|
||||
Reference in New Issue
Block a user