[Bug fix] Fix issue with gmail loader (#1228)

This commit is contained in:
Deshraj Yadav
2024-01-29 18:36:02 +05:30
committed by GitHub
parent 31bb0e7f0f
commit 2985b667b0
4 changed files with 28 additions and 9 deletions

View File

@@ -129,3 +129,18 @@ app.chat("What is the net worth of Bill Gates?", session_id="user2")
app.chat("What was my last question", session_id="user1") app.chat("What was my last question", session_id="user1")
# 'Your last question was "What is the net worth of Elon Musk?"' # 'Your last question was "What is the net worth of Elon Musk?"'
``` ```
### With custom context window
If you want to customize the context window that you want to use during chat (default context window is 3 document chunks), you can do using the following code snippet:
```python with custom chunks size
from embedchain import App
from embedchain.config import BaseLlmConfig
app = App()
app.add("https://www.forbes.com/profile/elon-musk")
query_config = BaseLlmConfig(number_documents=5)
app.chat("What is the net worth of Elon Musk?", config=query_config)
```

View File

@@ -7,11 +7,12 @@ When we say "custom", we mean that you can customize the loader and chunker to y
```python ```python
from embedchain import App from embedchain import App
import your_loader import your_loader
import your_chunker from my_module import CustomLoader
from my_module import CustomChunker
app = App() app = App()
loader = your_loader() loader = CustomLoader()
chunker = your_chunker() chunker = CustomChunker()
app.add("source", data_type="custom", loader=loader, chunker=chunker) app.add("source", data_type="custom", loader=loader, chunker=chunker)
``` ```

View File

@@ -39,11 +39,14 @@ class BaseChunker(JSONSerializable):
for data in data_records: for data in data_records:
content = data["content"] content = data["content"]
meta_data = data["meta_data"] metadata = data["meta_data"]
# add data type to meta data to allow query using data type # add data type to meta data to allow query using data type
meta_data["data_type"] = self.data_type.value metadata["data_type"] = self.data_type.value
meta_data["doc_id"] = doc_id metadata["doc_id"] = doc_id
url = meta_data["url"]
# TODO: Currently defaulting to the src as the url. This is done intentianally since some
# of the data types like 'gmail' loader doesn't have the url in the meta data.
url = metadata.get("url", src)
chunks = self.get_chunks(content) chunks = self.get_chunks(content)
for chunk in chunks: for chunk in chunks:
@@ -53,7 +56,7 @@ class BaseChunker(JSONSerializable):
id_map[chunk_id] = True id_map[chunk_id] = True
chunk_ids.append(chunk_id) chunk_ids.append(chunk_id)
documents.append(chunk) documents.append(chunk)
metadatas.append(meta_data) metadatas.append(metadata)
return { return {
"documents": documents, "documents": documents,
"ids": chunk_ids, "ids": chunk_ids,

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "embedchain" name = "embedchain"
version = "0.1.71" version = "0.1.72"
description = "Simplest open source retrieval(RAG) framework" description = "Simplest open source retrieval(RAG) framework"
authors = [ authors = [
"Taranjeet Singh <taranjeet@embedchain.ai>", "Taranjeet Singh <taranjeet@embedchain.ai>",