From db37b2ac153e8cfa3300a5c128f250f1fb3984e6 Mon Sep 17 00:00:00 2001 From: Deven Patel Date: Mon, 18 Dec 2023 17:32:26 +0530 Subject: [PATCH] [Bugfix] fix chunker config bug (#1024) Co-authored-by: Deven Patel --- docs/api-reference/advanced/configuration.mdx | 7 +++-- embedchain/utils.py | 1 + examples/chainlit/app.py | 30 +++++++++---------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/docs/api-reference/advanced/configuration.mdx b/docs/api-reference/advanced/configuration.mdx index fa847185..2dfdc309 100644 --- a/docs/api-reference/advanced/configuration.mdx +++ b/docs/api-reference/advanced/configuration.mdx @@ -53,6 +53,7 @@ chunker: chunk_size: 2000 chunk_overlap: 100 length_function: 'len' + min_chunk_size: 0 ``` ```json config.json @@ -91,7 +92,8 @@ chunker: "chunker": { "chunk_size": 2000, "chunk_overlap": 100, - "length_function": "len" + "length_function": "len", + "min_chunk_size": 0 } } ``` @@ -138,7 +140,8 @@ config = { 'chunker': { 'chunk_size': 2000, 'chunk_overlap': 100, - 'length_function': 'len' + 'length_function': 'len', + 'min_chunk_size': 0 } } ``` diff --git a/embedchain/utils.py b/embedchain/utils.py index ddb7d2d4..3838d805 100644 --- a/embedchain/utils.py +++ b/embedchain/utils.py @@ -428,6 +428,7 @@ def validate_config(config_data): Optional("chunk_size"): int, Optional("chunk_overlap"): int, Optional("length_function"): str, + Optional("min_chunk_size"): int, }, } ) diff --git a/examples/chainlit/app.py b/examples/chainlit/app.py index 498dad66..5e8ba855 100644 --- a/examples/chainlit/app.py +++ b/examples/chainlit/app.py @@ -1,24 +1,24 @@ -import chainlit as cl -from embedchain import Pipeline as App - import os +import chainlit as cl + +from embedchain import Pipeline as App + os.environ["OPENAI_API_KEY"] = "sk-xxx" + @cl.on_chat_start async def on_chat_start(): - app = App.from_config(config={ - 'app': { - 'config': { - 'name': 'chainlit-app' - } - }, - 'llm': { - 'config': { - 'stream': True, - } + app = App.from_config( + config={ + "app": {"config": {"name": "chainlit-app"}}, + "llm": { + "config": { + "stream": True, + } + }, } - }) + ) # import your data here app.add("https://www.forbes.com/profile/elon-musk/") app.collect_metrics = False @@ -31,5 +31,5 @@ async def on_message(message: cl.Message): msg = cl.Message(content="") for chunk in await cl.make_async(app.chat)(message.content): await msg.stream_token(chunk) - + await msg.send()