feat: add local text (#44)
This commits extends the "add_local" function. It adds support to take text and index/embed it.
This commit is contained in:
@@ -116,6 +116,15 @@ To add any web page, use the data_type as `web_page`. Eg:
|
||||
app.add('web_page', 'a_valid_web_page_url')
|
||||
```
|
||||
|
||||
### Text
|
||||
|
||||
To supply your own text, use the data_type as `text` and enter a string. The text is not processed, this can be very versatile. Eg:
|
||||
|
||||
```python
|
||||
app.add_local('text', 'Seek wealth, not money or status. Wealth is having assets that earn while you sleep. Money is how we transfer time and wealth. Status is your place in the social hierarchy.')
|
||||
```
|
||||
Note: This is not used in the examples because in most cases you will supply a whole paragraph or file, which did not fit.
|
||||
|
||||
### QnA Pair
|
||||
|
||||
To supply your own QnA pair, use the data_type as `qna_pair` and enter a tuple. Eg:
|
||||
|
||||
16
embedchain/chunkers/text.py
Normal file
16
embedchain/chunkers/text.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from embedchain.chunkers.base_chunker import BaseChunker
|
||||
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
|
||||
TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
"chunk_size": 300,
|
||||
"chunk_overlap": 0,
|
||||
"length_function": len,
|
||||
}
|
||||
|
||||
|
||||
class TextChunker(BaseChunker):
|
||||
def __init__(self):
|
||||
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
|
||||
super().__init__(text_splitter)
|
||||
@@ -9,10 +9,12 @@ from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
||||
from embedchain.loaders.pdf_file import PdfFileLoader
|
||||
from embedchain.loaders.web_page import WebPageLoader
|
||||
from embedchain.loaders.local_qna_pair import LocalQnaPairLoader
|
||||
from embedchain.loaders.local_text import LocalTextLoader
|
||||
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
||||
from embedchain.chunkers.pdf_file import PdfFileChunker
|
||||
from embedchain.chunkers.web_page import WebPageChunker
|
||||
from embedchain.chunkers.qna_pair import QnaPairChunker
|
||||
from embedchain.chunkers.text import TextChunker
|
||||
from embedchain.vectordb.chroma_db import ChromaDB
|
||||
|
||||
load_dotenv()
|
||||
@@ -49,7 +51,8 @@ class EmbedChain:
|
||||
'youtube_video': YoutubeVideoLoader(),
|
||||
'pdf_file': PdfFileLoader(),
|
||||
'web_page': WebPageLoader(),
|
||||
'qna_pair': LocalQnaPairLoader()
|
||||
'qna_pair': LocalQnaPairLoader(),
|
||||
'text': LocalTextLoader(),
|
||||
}
|
||||
if data_type in loaders:
|
||||
return loaders[data_type]
|
||||
@@ -69,6 +72,7 @@ class EmbedChain:
|
||||
'pdf_file': PdfFileChunker(),
|
||||
'web_page': WebPageChunker(),
|
||||
'qna_pair': QnaPairChunker(),
|
||||
'text': TextChunker(),
|
||||
}
|
||||
if data_type in chunkers:
|
||||
return chunkers[data_type]
|
||||
|
||||
10
embedchain/loaders/local_text.py
Normal file
10
embedchain/loaders/local_text.py
Normal file
@@ -0,0 +1,10 @@
|
||||
class LocalTextLoader:
|
||||
|
||||
def load_data(self, content):
|
||||
meta_data = {
|
||||
"url": "local",
|
||||
}
|
||||
return [{
|
||||
"content": content,
|
||||
"meta_data": meta_data,
|
||||
}]
|
||||
Reference in New Issue
Block a user