Add batch_size in config for VectorDB (#1448)

This commit is contained in:
Dev Khant
2024-06-28 03:15:58 +05:30
committed by GitHub
parent edaeb78ccf
commit 0a78198bb5
10 changed files with 28 additions and 34 deletions

View File

@@ -21,8 +21,6 @@ class QdrantDB(BaseVectorDB):
Qdrant as vector database
"""
BATCH_SIZE = 10
def __init__(self, config: QdrantDBConfig = None):
"""
Qdrant as vector database
@@ -116,7 +114,7 @@ class QdrantDB(BaseVectorDB):
collection_name=self.collection_name,
scroll_filter=models.Filter(must=qdrant_must_filters),
offset=offset,
limit=self.BATCH_SIZE,
limit=self.config.batch_size,
)
offset = response[1]
for doc in response[0]:
@@ -148,13 +146,13 @@ class QdrantDB(BaseVectorDB):
qdrant_ids.append(id)
payloads.append({"identifier": id, "text": document, "metadata": copy.deepcopy(metadata)})
for i in tqdm(range(0, len(qdrant_ids), self.BATCH_SIZE), desc="Adding data in batches"):
for i in tqdm(range(0, len(qdrant_ids), self.config.batch_size), desc="Adding data in batches"):
self.client.upsert(
collection_name=self.collection_name,
points=Batch(
ids=qdrant_ids[i : i + self.BATCH_SIZE],
payloads=payloads[i : i + self.BATCH_SIZE],
vectors=embeddings[i : i + self.BATCH_SIZE],
ids=qdrant_ids[i : i + self.config.batch_size],
payloads=payloads[i : i + self.config.batch_size],
vectors=embeddings[i : i + self.config.batch_size],
),
**kwargs,
)
@@ -251,4 +249,4 @@ class QdrantDB(BaseVectorDB):
def delete(self, where: dict):
db_filter = self._generate_query(where)
self.client.delete(collection_name=self.collection_name, points_selector=db_filter)
self.client.delete(collection_name=self.collection_name, points_selector=db_filter)