From a5c86a2f5c40749dba5d0aff16224396288b6e2b Mon Sep 17 00:00:00 2001 From: Deshraj Yadav Date: Thu, 19 Oct 2023 17:46:33 -0700 Subject: [PATCH] [Bugfix]: Fix issue of context overspilling into other apps (#835) --- embedchain/chunkers/base_chunker.py | 1 + tests/chunkers/test_base_chunker.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/embedchain/chunkers/base_chunker.py b/embedchain/chunkers/base_chunker.py index 36e58064..f7b5cb09 100644 --- a/embedchain/chunkers/base_chunker.py +++ b/embedchain/chunkers/base_chunker.py @@ -44,6 +44,7 @@ class BaseChunker(JSONSerializable): for chunk in chunks: chunk_id = hashlib.sha256((chunk + url).encode()).hexdigest() + chunk_id = f"{app_id}--{chunk_id}" if app_id is not None else chunk_id if idMap.get(chunk_id) is None: idMap[chunk_id] = True chunk_ids.append(chunk_id) diff --git a/tests/chunkers/test_base_chunker.py b/tests/chunkers/test_base_chunker.py index 6f89cd03..343653ee 100644 --- a/tests/chunkers/test_base_chunker.py +++ b/tests/chunkers/test_base_chunker.py @@ -44,8 +44,8 @@ def test_create_chunks(chunker, text_splitter_mock, loader_mock, app_id, data_ty result = chunker.create_chunks(loader_mock, "test_src", app_id) expected_ids = [ - hashlib.sha256(("Chunk 1" + "URL 1").encode()).hexdigest(), - hashlib.sha256(("Chunk 2" + "URL 1").encode()).hexdigest(), + f"{app_id}--" + hashlib.sha256(("Chunk 1" + "URL 1").encode()).hexdigest(), + f"{app_id}--" + hashlib.sha256(("Chunk 2" + "URL 1").encode()).hexdigest(), ] assert result["documents"] == ["Chunk 1", "Chunk 2"]