[Bug fix] Fix issue with gmail loader (#1228)
This commit is contained in:
@@ -129,3 +129,18 @@ app.chat("What is the net worth of Bill Gates?", session_id="user2")
|
|||||||
app.chat("What was my last question", session_id="user1")
|
app.chat("What was my last question", session_id="user1")
|
||||||
# 'Your last question was "What is the net worth of Elon Musk?"'
|
# 'Your last question was "What is the net worth of Elon Musk?"'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### With custom context window
|
||||||
|
|
||||||
|
If you want to customize the context window that you want to use during chat (default context window is 3 document chunks), you can do using the following code snippet:
|
||||||
|
|
||||||
|
```python with custom chunks size
|
||||||
|
from embedchain import App
|
||||||
|
from embedchain.config import BaseLlmConfig
|
||||||
|
|
||||||
|
app = App()
|
||||||
|
app.add("https://www.forbes.com/profile/elon-musk")
|
||||||
|
|
||||||
|
query_config = BaseLlmConfig(number_documents=5)
|
||||||
|
app.chat("What is the net worth of Elon Musk?", config=query_config)
|
||||||
|
```
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ When we say "custom", we mean that you can customize the loader and chunker to y
|
|||||||
```python
|
```python
|
||||||
from embedchain import App
|
from embedchain import App
|
||||||
import your_loader
|
import your_loader
|
||||||
import your_chunker
|
from my_module import CustomLoader
|
||||||
|
from my_module import CustomChunker
|
||||||
|
|
||||||
app = App()
|
app = App()
|
||||||
loader = your_loader()
|
loader = CustomLoader()
|
||||||
chunker = your_chunker()
|
chunker = CustomChunker()
|
||||||
|
|
||||||
app.add("source", data_type="custom", loader=loader, chunker=chunker)
|
app.add("source", data_type="custom", loader=loader, chunker=chunker)
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -39,11 +39,14 @@ class BaseChunker(JSONSerializable):
|
|||||||
for data in data_records:
|
for data in data_records:
|
||||||
content = data["content"]
|
content = data["content"]
|
||||||
|
|
||||||
meta_data = data["meta_data"]
|
metadata = data["meta_data"]
|
||||||
# add data type to meta data to allow query using data type
|
# add data type to meta data to allow query using data type
|
||||||
meta_data["data_type"] = self.data_type.value
|
metadata["data_type"] = self.data_type.value
|
||||||
meta_data["doc_id"] = doc_id
|
metadata["doc_id"] = doc_id
|
||||||
url = meta_data["url"]
|
|
||||||
|
# TODO: Currently defaulting to the src as the url. This is done intentianally since some
|
||||||
|
# of the data types like 'gmail' loader doesn't have the url in the meta data.
|
||||||
|
url = metadata.get("url", src)
|
||||||
|
|
||||||
chunks = self.get_chunks(content)
|
chunks = self.get_chunks(content)
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
@@ -53,7 +56,7 @@ class BaseChunker(JSONSerializable):
|
|||||||
id_map[chunk_id] = True
|
id_map[chunk_id] = True
|
||||||
chunk_ids.append(chunk_id)
|
chunk_ids.append(chunk_id)
|
||||||
documents.append(chunk)
|
documents.append(chunk)
|
||||||
metadatas.append(meta_data)
|
metadatas.append(metadata)
|
||||||
return {
|
return {
|
||||||
"documents": documents,
|
"documents": documents,
|
||||||
"ids": chunk_ids,
|
"ids": chunk_ids,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "embedchain"
|
name = "embedchain"
|
||||||
version = "0.1.71"
|
version = "0.1.72"
|
||||||
description = "Simplest open source retrieval(RAG) framework"
|
description = "Simplest open source retrieval(RAG) framework"
|
||||||
authors = [
|
authors = [
|
||||||
"Taranjeet Singh <taranjeet@embedchain.ai>",
|
"Taranjeet Singh <taranjeet@embedchain.ai>",
|
||||||
|
|||||||
Reference in New Issue
Block a user