feat: add local text (#44)

This commits extends the "add_local" function. It
adds support to take text and index/embed it.
This commit is contained in:
cachho
2023-06-25 19:43:41 +02:00
committed by GitHub
parent b9277c84c8
commit f5f5e7edd1
4 changed files with 40 additions and 1 deletions

View File

@@ -116,6 +116,15 @@ To add any web page, use the data_type as `web_page`. Eg:
app.add('web_page', 'a_valid_web_page_url')
```
### Text
To supply your own text, use the data_type as `text` and enter a string. The text is not processed, this can be very versatile. Eg:
```python
app.add_local('text', 'Seek wealth, not money or status. Wealth is having assets that earn while you sleep. Money is how we transfer time and wealth. Status is your place in the social hierarchy.')
```
Note: This is not used in the examples because in most cases you will supply a whole paragraph or file, which did not fit.
### QnA Pair
To supply your own QnA pair, use the data_type as `qna_pair` and enter a tuple. Eg:

View File

@@ -0,0 +1,16 @@
from embedchain.chunkers.base_chunker import BaseChunker
from langchain.text_splitter import RecursiveCharacterTextSplitter
TEXT_SPLITTER_CHUNK_PARAMS = {
"chunk_size": 300,
"chunk_overlap": 0,
"length_function": len,
}
class TextChunker(BaseChunker):
def __init__(self):
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
super().__init__(text_splitter)

View File

@@ -9,10 +9,12 @@ from embedchain.loaders.youtube_video import YoutubeVideoLoader
from embedchain.loaders.pdf_file import PdfFileLoader
from embedchain.loaders.web_page import WebPageLoader
from embedchain.loaders.local_qna_pair import LocalQnaPairLoader
from embedchain.loaders.local_text import LocalTextLoader
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
from embedchain.chunkers.pdf_file import PdfFileChunker
from embedchain.chunkers.web_page import WebPageChunker
from embedchain.chunkers.qna_pair import QnaPairChunker
from embedchain.chunkers.text import TextChunker
from embedchain.vectordb.chroma_db import ChromaDB
load_dotenv()
@@ -49,7 +51,8 @@ class EmbedChain:
'youtube_video': YoutubeVideoLoader(),
'pdf_file': PdfFileLoader(),
'web_page': WebPageLoader(),
'qna_pair': LocalQnaPairLoader()
'qna_pair': LocalQnaPairLoader(),
'text': LocalTextLoader(),
}
if data_type in loaders:
return loaders[data_type]
@@ -69,6 +72,7 @@ class EmbedChain:
'pdf_file': PdfFileChunker(),
'web_page': WebPageChunker(),
'qna_pair': QnaPairChunker(),
'text': TextChunker(),
}
if data_type in chunkers:
return chunkers[data_type]

View File

@@ -0,0 +1,10 @@
class LocalTextLoader:
def load_data(self, content):
meta_data = {
"url": "local",
}
return [{
"content": content,
"meta_data": meta_data,
}]