feat: add local text (#44)
This commits extends the "add_local" function. It adds support to take text and index/embed it.
This commit is contained in:
16
embedchain/chunkers/text.py
Normal file
16
embedchain/chunkers/text.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
|
||||
TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
"chunk_size": 300,
|
||||
"chunk_overlap": 0,
|
||||
"length_function": len,
|
||||
}
|
||||
|
||||
|
||||
class TextChunker(BaseChunker):
|
||||
def __init__(self):
|
||||
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
|
||||
super().__init__(text_splitter)
|
||||
@@ -9,10 +9,12 @@ from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
||||
from embedchain.loaders.pdf_file import PdfFileLoader
|
||||
from embedchain.loaders.web_page import WebPageLoader
|
||||
from embedchain.loaders.local_qna_pair import LocalQnaPairLoader
|
||||
from embedchain.loaders.local_text import LocalTextLoader
|
||||
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
||||
from embedchain.chunkers.pdf_file import PdfFileChunker
|
||||
from embedchain.chunkers.web_page import WebPageChunker
|
||||
from embedchain.chunkers.qna_pair import QnaPairChunker
|
||||
from embedchain.chunkers.text import TextChunker
|
||||
from embedchain.vectordb.chroma_db import ChromaDB
|
||||
|
||||
load_dotenv()
|
||||
@@ -49,7 +51,8 @@ class EmbedChain:
|
||||
'youtube_video': YoutubeVideoLoader(),
|
||||
'pdf_file': PdfFileLoader(),
|
||||
'web_page': WebPageLoader(),
|
||||
'qna_pair': LocalQnaPairLoader()
|
||||
'qna_pair': LocalQnaPairLoader(),
|
||||
'text': LocalTextLoader(),
|
||||
}
|
||||
if data_type in loaders:
|
||||
return loaders[data_type]
|
||||
@@ -69,6 +72,7 @@ class EmbedChain:
|
||||
'pdf_file': PdfFileChunker(),
|
||||
'web_page': WebPageChunker(),
|
||||
'qna_pair': QnaPairChunker(),
|
||||
'text': TextChunker(),
|
||||
}
|
||||
if data_type in chunkers:
|
||||
return chunkers[data_type]
|
||||
|
||||
10
embedchain/loaders/local_text.py
Normal file
10
embedchain/loaders/local_text.py
Normal file
@@ -0,0 +1,10 @@
|
||||
class LocalTextLoader:
|
||||
|
||||
def load_data(self, content):
|
||||
meta_data = {
|
||||
"url": "local",
|
||||
}
|
||||
return [{
|
||||
"content": content,
|
||||
"meta_data": meta_data,
|
||||
}]
|
||||
Reference in New Issue
Block a user