[Bug fix] Fix issue with gmail loader (#1228)

This commit is contained in:
Deshraj Yadav
2024-01-29 18:36:02 +05:30
committed by GitHub
parent 31bb0e7f0f
commit 2985b667b0
4 changed files with 28 additions and 9 deletions

View File

@@ -129,3 +129,18 @@ app.chat("What is the net worth of Bill Gates?", session_id="user2")
app.chat("What was my last question", session_id="user1")
# 'Your last question was "What is the net worth of Elon Musk?"'
```
### With custom context window
If you want to customize the context window that you want to use during chat (default context window is 3 document chunks), you can do using the following code snippet:
```python with custom chunks size
from embedchain import App
from embedchain.config import BaseLlmConfig
app = App()
app.add("https://www.forbes.com/profile/elon-musk")
query_config = BaseLlmConfig(number_documents=5)
app.chat("What is the net worth of Elon Musk?", config=query_config)
```

View File

@@ -7,11 +7,12 @@ When we say "custom", we mean that you can customize the loader and chunker to y
```python
from embedchain import App
import your_loader
import your_chunker
from my_module import CustomLoader
from my_module import CustomChunker
app = App()
loader = your_loader()
chunker = your_chunker()
loader = CustomLoader()
chunker = CustomChunker()
app.add("source", data_type="custom", loader=loader, chunker=chunker)
```

View File

@@ -39,11 +39,14 @@ class BaseChunker(JSONSerializable):
for data in data_records:
content = data["content"]
meta_data = data["meta_data"]
metadata = data["meta_data"]
# add data type to meta data to allow query using data type
meta_data["data_type"] = self.data_type.value
meta_data["doc_id"] = doc_id
url = meta_data["url"]
metadata["data_type"] = self.data_type.value
metadata["doc_id"] = doc_id
# TODO: Currently defaulting to the src as the url. This is done intentianally since some
# of the data types like 'gmail' loader doesn't have the url in the meta data.
url = metadata.get("url", src)
chunks = self.get_chunks(content)
for chunk in chunks:
@@ -53,7 +56,7 @@ class BaseChunker(JSONSerializable):
id_map[chunk_id] = True
chunk_ids.append(chunk_id)
documents.append(chunk)
metadatas.append(meta_data)
metadatas.append(metadata)
return {
"documents": documents,
"ids": chunk_ids,

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "embedchain"
version = "0.1.71"
version = "0.1.72"
description = "Simplest open source retrieval(RAG) framework"
authors = [
"Taranjeet Singh <taranjeet@embedchain.ai>",