[Bug fix] Fix issue with gmail loader (#1228)
This commit is contained in:
@@ -129,3 +129,18 @@ app.chat("What is the net worth of Bill Gates?", session_id="user2")
|
||||
app.chat("What was my last question", session_id="user1")
|
||||
# 'Your last question was "What is the net worth of Elon Musk?"'
|
||||
```
|
||||
|
||||
### With custom context window
|
||||
|
||||
If you want to customize the context window that you want to use during chat (default context window is 3 document chunks), you can do using the following code snippet:
|
||||
|
||||
```python with custom chunks size
|
||||
from embedchain import App
|
||||
from embedchain.config import BaseLlmConfig
|
||||
|
||||
app = App()
|
||||
app.add("https://www.forbes.com/profile/elon-musk")
|
||||
|
||||
query_config = BaseLlmConfig(number_documents=5)
|
||||
app.chat("What is the net worth of Elon Musk?", config=query_config)
|
||||
```
|
||||
|
||||
@@ -7,11 +7,12 @@ When we say "custom", we mean that you can customize the loader and chunker to y
|
||||
```python
|
||||
from embedchain import App
|
||||
import your_loader
|
||||
import your_chunker
|
||||
from my_module import CustomLoader
|
||||
from my_module import CustomChunker
|
||||
|
||||
app = App()
|
||||
loader = your_loader()
|
||||
chunker = your_chunker()
|
||||
loader = CustomLoader()
|
||||
chunker = CustomChunker()
|
||||
|
||||
app.add("source", data_type="custom", loader=loader, chunker=chunker)
|
||||
```
|
||||
|
||||
@@ -39,11 +39,14 @@ class BaseChunker(JSONSerializable):
|
||||
for data in data_records:
|
||||
content = data["content"]
|
||||
|
||||
meta_data = data["meta_data"]
|
||||
metadata = data["meta_data"]
|
||||
# add data type to meta data to allow query using data type
|
||||
meta_data["data_type"] = self.data_type.value
|
||||
meta_data["doc_id"] = doc_id
|
||||
url = meta_data["url"]
|
||||
metadata["data_type"] = self.data_type.value
|
||||
metadata["doc_id"] = doc_id
|
||||
|
||||
# TODO: Currently defaulting to the src as the url. This is done intentianally since some
|
||||
# of the data types like 'gmail' loader doesn't have the url in the meta data.
|
||||
url = metadata.get("url", src)
|
||||
|
||||
chunks = self.get_chunks(content)
|
||||
for chunk in chunks:
|
||||
@@ -53,7 +56,7 @@ class BaseChunker(JSONSerializable):
|
||||
id_map[chunk_id] = True
|
||||
chunk_ids.append(chunk_id)
|
||||
documents.append(chunk)
|
||||
metadatas.append(meta_data)
|
||||
metadatas.append(metadata)
|
||||
return {
|
||||
"documents": documents,
|
||||
"ids": chunk_ids,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "embedchain"
|
||||
version = "0.1.71"
|
||||
version = "0.1.72"
|
||||
description = "Simplest open source retrieval(RAG) framework"
|
||||
authors = [
|
||||
"Taranjeet Singh <taranjeet@embedchain.ai>",
|
||||
|
||||
Reference in New Issue
Block a user