From 2b8b6d3ea9a832c9339ea3d874876209ddeefd36 Mon Sep 17 00:00:00 2001 From: Sidharth Mohanty Date: Wed, 8 Nov 2023 23:55:45 +0530 Subject: [PATCH] Chunker config docs (#913) --- configs/chunker.yaml | 4 ++++ configs/full-stack.yaml | 5 +++++ docs/advanced/configuration.mdx | 15 ++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 configs/chunker.yaml diff --git a/configs/chunker.yaml b/configs/chunker.yaml new file mode 100644 index 00000000..63cf3f82 --- /dev/null +++ b/configs/chunker.yaml @@ -0,0 +1,4 @@ +chunker: + chunk_size: 100 + chunk_overlap: 20 + length_function: 'len' diff --git a/configs/full-stack.yaml b/configs/full-stack.yaml index ec3c7a2e..1da28209 100644 --- a/configs/full-stack.yaml +++ b/configs/full-stack.yaml @@ -2,6 +2,11 @@ app: config: id: 'full-stack-app' +chunker: + chunk_size: 100 + chunk_overlap: 20 + length_function: 'len' + llm: provider: openai config: diff --git a/docs/advanced/configuration.mdx b/docs/advanced/configuration.mdx index 4ab7f38a..44b4a9ec 100644 --- a/docs/advanced/configuration.mdx +++ b/docs/advanced/configuration.mdx @@ -11,6 +11,11 @@ app: config: id: 'full-stack-app' +chunker: + chunk_size: 100 + chunk_overlap: 20 + length_function: 'len' + llm: provider: openai config: @@ -49,7 +54,11 @@ Alright, let's dive into what each key means in the yaml config above: 1. `app` Section: - `config`: - `id` (String): The ID or name of your full-stack application. -2. `llm` Section: +2. `chunker` Section: + - `chunk_size` (Integer): The size of each chunk of text that is sent to the language model. + - `chunk_overlap` (Integer): The amount of overlap between each chunk of text. + - `length_function` (String): The function used to calculate the length of each chunk of text. In this case, it's set to 'len'. You can also use any function import directly as a string here. +3. `llm` Section: - `provider` (String): The provider for the language model, which is set to 'openai'. You can find the full list of llm providers in [our docs](/components/llms). - `model` (String): The specific model being used, 'gpt-3.5-turbo'. - `config`: @@ -59,13 +68,13 @@ Alright, let's dive into what each key means in the yaml config above: - `stream` (Boolean): Controls if the response is streamed back to the user (set to false). - `template` (String): A custom template for the prompt that the model uses to generate responses. - `system_prompt` (String): A system prompt for the model to follow when generating responses, in this case, it's set to the style of William Shakespeare. -3. `vectordb` Section: +4. `vectordb` Section: - `provider` (String): The provider for the vector database, set to 'chroma'. You can find the full list of vector database providers in [our docs](/components/vector-databases). - `config`: - `collection_name` (String): The initial collection name for the database, set to 'full-stack-app'. - `dir` (String): The directory for the database, set to 'db'. - `allow_reset` (Boolean): Indicates whether resetting the database is allowed, set to true. -4. `embedder` Section: +5. `embedder` Section: - `provider` (String): The provider for the embedder, set to 'openai'. You can find the full list of embedding model providers in [our docs](/components/embedding-models). - `config`: - `model` (String): The specific model used for text embedding, 'text-embedding-ada-002'.