feat: add local text (#44)

This commits extends the "add_local" function. It
adds support to take text and index/embed it.
This commit is contained in:
cachho
2023-06-25 19:43:41 +02:00
committed by GitHub
parent b9277c84c8
commit f5f5e7edd1
4 changed files with 40 additions and 1 deletions

View File

@@ -0,0 +1,16 @@
from embedchain.chunkers.base_chunker import BaseChunker
from langchain.text_splitter import RecursiveCharacterTextSplitter
TEXT_SPLITTER_CHUNK_PARAMS = {
"chunk_size": 300,
"chunk_overlap": 0,
"length_function": len,
}
class TextChunker(BaseChunker):
def __init__(self):
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
super().__init__(text_splitter)

View File

@@ -9,10 +9,12 @@ from embedchain.loaders.youtube_video import YoutubeVideoLoader
from embedchain.loaders.pdf_file import PdfFileLoader
from embedchain.loaders.web_page import WebPageLoader
from embedchain.loaders.local_qna_pair import LocalQnaPairLoader
from embedchain.loaders.local_text import LocalTextLoader
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
from embedchain.chunkers.pdf_file import PdfFileChunker
from embedchain.chunkers.web_page import WebPageChunker
from embedchain.chunkers.qna_pair import QnaPairChunker
from embedchain.chunkers.text import TextChunker
from embedchain.vectordb.chroma_db import ChromaDB
load_dotenv()
@@ -49,7 +51,8 @@ class EmbedChain:
'youtube_video': YoutubeVideoLoader(),
'pdf_file': PdfFileLoader(),
'web_page': WebPageLoader(),
'qna_pair': LocalQnaPairLoader()
'qna_pair': LocalQnaPairLoader(),
'text': LocalTextLoader(),
}
if data_type in loaders:
return loaders[data_type]
@@ -69,6 +72,7 @@ class EmbedChain:
'pdf_file': PdfFileChunker(),
'web_page': WebPageChunker(),
'qna_pair': QnaPairChunker(),
'text': TextChunker(),
}
if data_type in chunkers:
return chunkers[data_type]

View File

@@ -0,0 +1,10 @@
class LocalTextLoader:
def load_data(self, content):
meta_data = {
"url": "local",
}
return [{
"content": content,
"meta_data": meta_data,
}]