From 7397279872b0141188f65676184a5bdcf1788e38 Mon Sep 17 00:00:00 2001 From: Dev Khant Date: Sat, 11 Jan 2025 23:46:42 +0530 Subject: [PATCH] HNSW support for pgvector (#2139) --- docs/components/vectordbs/dbs/pgvector.mdx | 3 ++- mem0/configs/vector_stores/pgvector.py | 1 + mem0/vector_stores/pgvector.py | 18 ++++++++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/docs/components/vectordbs/dbs/pgvector.mdx b/docs/components/vectordbs/dbs/pgvector.mdx index 57c4bdee..d505de00 100644 --- a/docs/components/vectordbs/dbs/pgvector.mdx +++ b/docs/components/vectordbs/dbs/pgvector.mdx @@ -37,4 +37,5 @@ Here's the parameters available for configuring pgvector: | `password` | Password to connect to the database | `None` | | `host` | The host where the Postgres server is running | `None` | | `port` | The port where the Postgres server is running | `None` | -| `diskann` | Whether to use diskann for vector similarity search (requires pgvectorscale) | `True` | \ No newline at end of file +| `diskann` | Whether to use diskann for vector similarity search (requires pgvectorscale) | `True` | +| `hnsw` | Whether to use hnsw for vector similarity search | `False` | \ No newline at end of file diff --git a/mem0/configs/vector_stores/pgvector.py b/mem0/configs/vector_stores/pgvector.py index b81ed985..c9047d5f 100644 --- a/mem0/configs/vector_stores/pgvector.py +++ b/mem0/configs/vector_stores/pgvector.py @@ -12,6 +12,7 @@ class PGVectorConfig(BaseModel): host: Optional[str] = Field(None, description="Database host. Default is localhost") port: Optional[int] = Field(None, description="Database port. Default is 1536") diskann: Optional[bool] = Field(True, description="Use diskann for approximate nearest neighbors search") + hnsw: Optional[bool] = Field(False, description="Use hnsw for faster search") @model_validator(mode="before") def check_auth_and_connection(cls, values): diff --git a/mem0/vector_stores/pgvector.py b/mem0/vector_stores/pgvector.py index c8893e37..989b60df 100644 --- a/mem0/vector_stores/pgvector.py +++ b/mem0/vector_stores/pgvector.py @@ -32,6 +32,7 @@ class PGVector(VectorStoreBase): host, port, diskann, + hnsw, ): """ Initialize the PGVector database. @@ -45,9 +46,11 @@ class PGVector(VectorStoreBase): host (str, optional): Database host port (int, optional): Database port diskann (bool, optional): Use DiskANN for faster search + hnsw (bool, optional): Use HNSW for faster search """ self.collection_name = collection_name self.use_diskann = diskann + self.use_hnsw = hnsw self.conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host, port=port) self.cur = self.conn.cursor() @@ -59,11 +62,10 @@ class PGVector(VectorStoreBase): def create_col(self, embedding_model_dims): """ Create a new collection (table in PostgreSQL). - Will also initialize DiskANN index if the extension is installed. + Will also initialize vector search index if specified. Args: - name (str): Name of the collection. - embedding_model_dims (int, optional): Dimension of the embedding vector. + embedding_model_dims (int): Dimension of the embedding vector. """ self.cur.execute( f""" @@ -82,11 +84,19 @@ class PGVector(VectorStoreBase): # Create DiskANN index if extension is installed for faster search self.cur.execute( f""" - CREATE INDEX IF NOT EXISTS {self.collection_name}_vector_idx + CREATE INDEX IF NOT EXISTS {self.collection_name}_diskann_idx ON {self.collection_name} USING diskann (vector); """ ) + elif self.use_hnsw: + self.cur.execute( + f""" + CREATE INDEX IF NOT EXISTS {self.collection_name}_hnsw_idx + ON {self.collection_name} + USING hnsw (vector vector_cosine_ops) + """ + ) self.conn.commit()