[Bug fix]: fix Cannot add documents to chromadb with inconsistent sizes. (#1314)
This commit is contained in:
@@ -378,11 +378,13 @@ class EmbedChain(JSONSerializable):
|
||||
# Chunk documents into batches of 2048 and handle each batch
|
||||
# helps wigth large loads of embeddings that hit OpenAI limits
|
||||
document_batches = [documents[i : i + 2048] for i in range(0, len(documents), 2048)]
|
||||
for batch in document_batches:
|
||||
metadata_batches = [metadatas[i : i + 2048] for i in range(0, len(metadatas), 2048)]
|
||||
id_batches = [ids[i : i + 2048] for i in range(0, len(ids), 2048)]
|
||||
for batch_docs, batch_meta, batch_ids in zip(document_batches, metadata_batches, id_batches):
|
||||
try:
|
||||
# Add only valid batches
|
||||
if batch:
|
||||
self.db.add(documents=batch, metadatas=metadatas, ids=ids, **kwargs)
|
||||
if batch_docs:
|
||||
self.db.add(documents=batch_docs, metadatas=batch_meta, ids=batch_ids, **kwargs)
|
||||
except Exception as e:
|
||||
print(f"Failed to add batch due to a bad request: {e}")
|
||||
# Handle the error, e.g., by logging, retrying, or skipping
|
||||
|
||||
Reference in New Issue
Block a user