Support for hybrid search in Azure AI vector store (#2408)

Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
This commit is contained in:
Dev Khant
2025-03-20 22:57:00 +05:30
committed by GitHub
parent 8b9a8e5825
commit 8e6a08aa83
24 changed files with 275 additions and 294 deletions

View File

@@ -8,21 +8,26 @@ class AzureAISearchConfig(BaseModel):
api_key: str = Field(None, description="API key for the Azure AI Search service")
embedding_model_dims: int = Field(None, description="Dimension of the embedding vector")
compression_type: Optional[str] = Field(
None,
description="Type of vector compression to use. Options: 'scalar', 'binary', or None"
None, description="Type of vector compression to use. Options: 'scalar', 'binary', or None"
)
use_float16: bool = Field(
False,
description="Whether to store vectors in half precision (Edm.Half) instead of full precision (Edm.Single)"
False,
description="Whether to store vectors in half precision (Edm.Half) instead of full precision (Edm.Single)",
)
hybrid_search: bool = Field(
False, description="Whether to use hybrid search. If True, vector_filter_mode must be 'preFilter'"
)
vector_filter_mode: Optional[str] = Field(
"preFilter", description="Mode for vector filtering. Options: 'preFilter', 'postFilter'"
)
@model_validator(mode="before")
@classmethod
def validate_extra_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]:
allowed_fields = set(cls.model_fields.keys())
input_fields = set(values.keys())
extra_fields = input_fields - allowed_fields
# Check for use_compression to provide a helpful error
if "use_compression" in extra_fields:
raise ValueError(
@@ -30,13 +35,13 @@ class AzureAISearchConfig(BaseModel):
"Please use 'compression_type=\"scalar\"' instead of 'use_compression=True' "
"or 'compression_type=None' instead of 'use_compression=False'."
)
if extra_fields:
raise ValueError(
f"Extra fields not allowed: {', '.join(extra_fields)}. "
f"Please input only the following fields: {', '.join(allowed_fields)}"
)
# Validate compression_type values
if "compression_type" in values and values["compression_type"] is not None:
valid_types = ["scalar", "binary"]
@@ -45,9 +50,9 @@ class AzureAISearchConfig(BaseModel):
f"Invalid compression_type: {values['compression_type']}. "
f"Must be one of: {', '.join(valid_types)}, or None"
)
return values
model_config = {
"arbitrary_types_allowed": True,
}
}

View File

@@ -17,8 +17,7 @@ class ElasticsearchConfig(BaseModel):
use_ssl: bool = Field(True, description="Use SSL for connection")
auto_create_index: bool = Field(True, description="Automatically create index during initialization")
custom_search_query: Optional[Callable[[List[float], int, Optional[Dict]], Dict]] = Field(
None,
description="Custom search query function. Parameters: (query, limit, filters) -> Dict"
None, description="Custom search query function. Parameters: (query, limit, filters) -> Dict"
)
@model_validator(mode="before")

View File

@@ -14,9 +14,7 @@ class GoogleMatchingEngineConfig(BaseModel):
credentials_path: Optional[str] = Field(None, description="Path to service account credentials file")
vector_search_api_endpoint: Optional[str] = Field(None, description="Vector search API endpoint")
model_config = {
"extra": "forbid"
}
model_config = {"extra": "forbid"}
def __init__(self, **kwargs):
super().__init__(**kwargs)
@@ -26,4 +24,4 @@ class GoogleMatchingEngineConfig(BaseModel):
def model_post_init(self, _context) -> None:
"""Set collection_name to index_id if not provided"""
if self.collection_name is None:
self.collection_name = self.index_id
self.collection_name = self.index_id