Introduce chunker config in yaml config (#907)

This commit is contained in:
Sidharth Mohanty
2023-11-06 23:13:15 +05:30
committed by GitHub
parent f0d112254b
commit a1de238716
5 changed files with 42 additions and 8 deletions

View File

@@ -9,7 +9,7 @@ import requests
import yaml
from embedchain import Client
from embedchain.config import PipelineConfig
from embedchain.config import PipelineConfig, ChunkerConfig
from embedchain.embedchain import CONFIG_DIR, EmbedChain
from embedchain.embedder.base import BaseEmbedder
from embedchain.embedder.openai import OpenAIEmbedder
@@ -44,6 +44,7 @@ class Pipeline(EmbedChain):
yaml_path: str = None,
log_level=logging.INFO,
auto_deploy: bool = False,
chunker: ChunkerConfig = None,
):
"""
Initialize a new `App` instance.
@@ -84,6 +85,10 @@ class Pipeline(EmbedChain):
# pipeline_id from the backend
self.id = None
self.chunker = None
if chunker:
self.chunker = ChunkerConfig(**chunker)
self.config = config or PipelineConfig()
self.name = self.config.name
@@ -366,6 +371,7 @@ class Pipeline(EmbedChain):
db_config_data = config_data.get("vectordb", {})
embedding_model_config_data = config_data.get("embedding_model", config_data.get("embedder", {}))
llm_config_data = config_data.get("llm", {})
chunker_config_data = config_data.get("chunker", {})
pipeline_config = PipelineConfig(**pipeline_config_data)
@@ -394,4 +400,5 @@ class Pipeline(EmbedChain):
embedding_model=embedding_model,
yaml_path=yaml_path,
auto_deploy=auto_deploy,
chunker=chunker_config_data,
)