Introduce chunker config in yaml config (#907)

This commit is contained in:
Sidharth Mohanty
2023-11-06 23:13:15 +05:30
committed by GitHub
parent f0d112254b
commit a1de238716
5 changed files with 42 additions and 8 deletions

View File

@@ -10,15 +10,14 @@ from dotenv import load_dotenv
from langchain.docstore.document import Document
from embedchain.chunkers.base_chunker import BaseChunker
from embedchain.config import AddConfig, BaseLlmConfig
from embedchain.config import AddConfig, BaseLlmConfig, ChunkerConfig
from embedchain.config.apps.base_app_config import BaseAppConfig
from embedchain.data_formatter import DataFormatter
from embedchain.embedder.base import BaseEmbedder
from embedchain.helper.json_serializable import JSONSerializable
from embedchain.llm.base import BaseLlm
from embedchain.loaders.base_loader import BaseLoader
from embedchain.models.data_type import (DataType, DirectDataType,
IndirectDataType, SpecialDataType)
from embedchain.models.data_type import DataType, DirectDataType, IndirectDataType, SpecialDataType
from embedchain.telemetry.posthog import AnonymousTelemetry
from embedchain.utils import detect_datatype, is_valid_json_string
from embedchain.vectordb.base import BaseVectorDB
@@ -84,6 +83,7 @@ class EmbedChain(JSONSerializable):
# Attributes that aren't subclass related.
self.user_asks = []
self.chunker: ChunkerConfig = None
# Send anonymous telemetry
self._telemetry_props = {"class": self.__class__.__name__}
self.telemetry = AnonymousTelemetry(enabled=self.config.collect_metrics)
@@ -157,7 +157,11 @@ class EmbedChain(JSONSerializable):
:return: source_hash, a md5-hash of the source, in hexadecimal representation.
:rtype: str
"""
if config is None:
if config is not None:
pass
elif self.chunker is not None:
config = AddConfig(chunker=self.chunker)
else:
config = AddConfig()
try: