[Bug fix]: fix Cannot add documents to chromadb with inconsistent sizes. (#1314)

This commit is contained in:
berwin joule
2024-03-14 02:01:46 +08:00
committed by GitHub
parent 117824b32c
commit ef69c91b60

View File

@@ -378,11 +378,13 @@ class EmbedChain(JSONSerializable):
# Chunk documents into batches of 2048 and handle each batch
# helps wigth large loads of embeddings that hit OpenAI limits
document_batches = [documents[i : i + 2048] for i in range(0, len(documents), 2048)]
for batch in document_batches:
metadata_batches = [metadatas[i : i + 2048] for i in range(0, len(metadatas), 2048)]
id_batches = [ids[i : i + 2048] for i in range(0, len(ids), 2048)]
for batch_docs, batch_meta, batch_ids in zip(document_batches, metadata_batches, id_batches):
try:
# Add only valid batches
if batch:
self.db.add(documents=batch, metadatas=metadatas, ids=ids, **kwargs)
if batch_docs:
self.db.add(documents=batch_docs, metadatas=batch_meta, ids=batch_ids, **kwargs)
except Exception as e:
print(f"Failed to add batch due to a bad request: {e}")
# Handle the error, e.g., by logging, retrying, or skipping