HNSW support for pgvector (#2139)
This commit is contained in:
@@ -37,4 +37,5 @@ Here's the parameters available for configuring pgvector:
|
|||||||
| `password` | Password to connect to the database | `None` |
|
| `password` | Password to connect to the database | `None` |
|
||||||
| `host` | The host where the Postgres server is running | `None` |
|
| `host` | The host where the Postgres server is running | `None` |
|
||||||
| `port` | The port where the Postgres server is running | `None` |
|
| `port` | The port where the Postgres server is running | `None` |
|
||||||
| `diskann` | Whether to use diskann for vector similarity search (requires pgvectorscale) | `True` |
|
| `diskann` | Whether to use diskann for vector similarity search (requires pgvectorscale) | `True` |
|
||||||
|
| `hnsw` | Whether to use hnsw for vector similarity search | `False` |
|
||||||
@@ -12,6 +12,7 @@ class PGVectorConfig(BaseModel):
|
|||||||
host: Optional[str] = Field(None, description="Database host. Default is localhost")
|
host: Optional[str] = Field(None, description="Database host. Default is localhost")
|
||||||
port: Optional[int] = Field(None, description="Database port. Default is 1536")
|
port: Optional[int] = Field(None, description="Database port. Default is 1536")
|
||||||
diskann: Optional[bool] = Field(True, description="Use diskann for approximate nearest neighbors search")
|
diskann: Optional[bool] = Field(True, description="Use diskann for approximate nearest neighbors search")
|
||||||
|
hnsw: Optional[bool] = Field(False, description="Use hnsw for faster search")
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
def check_auth_and_connection(cls, values):
|
def check_auth_and_connection(cls, values):
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ class PGVector(VectorStoreBase):
|
|||||||
host,
|
host,
|
||||||
port,
|
port,
|
||||||
diskann,
|
diskann,
|
||||||
|
hnsw,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialize the PGVector database.
|
Initialize the PGVector database.
|
||||||
@@ -45,9 +46,11 @@ class PGVector(VectorStoreBase):
|
|||||||
host (str, optional): Database host
|
host (str, optional): Database host
|
||||||
port (int, optional): Database port
|
port (int, optional): Database port
|
||||||
diskann (bool, optional): Use DiskANN for faster search
|
diskann (bool, optional): Use DiskANN for faster search
|
||||||
|
hnsw (bool, optional): Use HNSW for faster search
|
||||||
"""
|
"""
|
||||||
self.collection_name = collection_name
|
self.collection_name = collection_name
|
||||||
self.use_diskann = diskann
|
self.use_diskann = diskann
|
||||||
|
self.use_hnsw = hnsw
|
||||||
|
|
||||||
self.conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host, port=port)
|
self.conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host, port=port)
|
||||||
self.cur = self.conn.cursor()
|
self.cur = self.conn.cursor()
|
||||||
@@ -59,11 +62,10 @@ class PGVector(VectorStoreBase):
|
|||||||
def create_col(self, embedding_model_dims):
|
def create_col(self, embedding_model_dims):
|
||||||
"""
|
"""
|
||||||
Create a new collection (table in PostgreSQL).
|
Create a new collection (table in PostgreSQL).
|
||||||
Will also initialize DiskANN index if the extension is installed.
|
Will also initialize vector search index if specified.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
name (str): Name of the collection.
|
embedding_model_dims (int): Dimension of the embedding vector.
|
||||||
embedding_model_dims (int, optional): Dimension of the embedding vector.
|
|
||||||
"""
|
"""
|
||||||
self.cur.execute(
|
self.cur.execute(
|
||||||
f"""
|
f"""
|
||||||
@@ -82,11 +84,19 @@ class PGVector(VectorStoreBase):
|
|||||||
# Create DiskANN index if extension is installed for faster search
|
# Create DiskANN index if extension is installed for faster search
|
||||||
self.cur.execute(
|
self.cur.execute(
|
||||||
f"""
|
f"""
|
||||||
CREATE INDEX IF NOT EXISTS {self.collection_name}_vector_idx
|
CREATE INDEX IF NOT EXISTS {self.collection_name}_diskann_idx
|
||||||
ON {self.collection_name}
|
ON {self.collection_name}
|
||||||
USING diskann (vector);
|
USING diskann (vector);
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
elif self.use_hnsw:
|
||||||
|
self.cur.execute(
|
||||||
|
f"""
|
||||||
|
CREATE INDEX IF NOT EXISTS {self.collection_name}_hnsw_idx
|
||||||
|
ON {self.collection_name}
|
||||||
|
USING hnsw (vector vector_cosine_ops)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user