Feature: Add support for loading docs website (#293)

This commit is contained in:
Deshraj Yadav
2023-07-16 22:22:52 -07:00
committed by GitHub
parent d5e40e1853
commit a548863a09
10 changed files with 173 additions and 86 deletions

View File

@@ -0,0 +1,22 @@
from typing import Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config.AddConfig import ChunkerConfig
TEXT_SPLITTER_CHUNK_PARAMS = {
"chunk_size": 500,
"chunk_overlap": 50,
"length_function": len,
}
class DocsSiteChunker(BaseChunker):
"""Chunker for code docs site."""
def __init__(self, config: Optional[ChunkerConfig] = None):
if config is None:
config = TEXT_SPLITTER_CHUNK_PARAMS
text_splitter = RecursiveCharacterTextSplitter(**config)
super().__init__(text_splitter)