Skip to content

Commit

Permalink
Added pgvectorscale client (#355)
Browse files Browse the repository at this point in the history
* pgvectorscale client added

* added pgvectorscale dependencies to enable independent client installation

* Bug fix vector type not found in the database.
  • Loading branch information
Sheharyar570 authored Aug 1, 2024
1 parent f248805 commit 5857f5c
Show file tree
Hide file tree
Showing 9 changed files with 540 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ All the database client supported
| elastic | `pip install vectordb-bench[elastic]` |
| pgvector | `pip install vectordb-bench[pgvector]` |
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
| pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
| redis | `pip install vectordb-bench[redis]` |
| memorydb | `pip install vectordb-bench[memorydb]` |
| chromadb | `pip install vectordb-bench[chromadb]` |
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ pinecone = [ "pinecone-client" ]
weaviate = [ "weaviate-client" ]
elastic = [ "elasticsearch" ]
pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
pgvectorscale = [ "psycopg", "psycopg-binary", "pgvector" ]
pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.1" ]
redis = [ "redis" ]
memorydb = [ "memorydb" ]
Expand Down
13 changes: 13 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class DB(Enum):
WeaviateCloud = "WeaviateCloud"
PgVector = "PgVector"
PgVectoRS = "PgVectoRS"
PgVectorScale = "PgVectorScale"
Redis = "Redis"
MemoryDB = "MemoryDB"
Chroma = "Chroma"
Expand Down Expand Up @@ -71,6 +72,10 @@ def init_cls(self) -> Type[VectorDB]:
if self == DB.PgVectoRS:
from .pgvecto_rs.pgvecto_rs import PgVectoRS
return PgVectoRS

if self == DB.PgVectorScale:
from .pgvectorscale.pgvectorscale import PgVectorScale
return PgVectorScale

if self == DB.Redis:
from .redis.redis import Redis
Expand Down Expand Up @@ -123,6 +128,10 @@ def config_cls(self) -> Type[DBConfig]:
from .pgvecto_rs.config import PgVectoRSConfig
return PgVectoRSConfig

if self == DB.PgVectorScale:
from .pgvectorscale.config import PgVectorScaleConfig
return PgVectorScaleConfig

if self == DB.Redis:
from .redis.config import RedisConfig
return RedisConfig
Expand Down Expand Up @@ -172,6 +181,10 @@ def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseCon
from .aws_opensearch.config import AWSOpenSearchIndexConfig
return AWSOpenSearchIndexConfig

if self == DB.PgVectorScale:
from .pgvectorscale.config import _pgvectorscale_case_config
return _pgvectorscale_case_config.get(index_type)

# DB.Pinecone, DB.Chroma, DB.Redis
return EmptyDBCaseConfig

Expand Down
1 change: 1 addition & 0 deletions vectordb_bench/backend/clients/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class MetricType(str, Enum):
class IndexType(str, Enum):
HNSW = "HNSW"
DISKANN = "DISKANN"
STREAMING_DISKANN = "DISKANN"
IVFFlat = "IVF_FLAT"
IVFSQ8 = "IVF_SQ8"
Flat = "FLAT"
Expand Down
111 changes: 111 additions & 0 deletions vectordb_bench/backend/clients/pgvectorscale/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from abc import abstractmethod
from typing import TypedDict
from pydantic import BaseModel, SecretStr
from typing_extensions import LiteralString
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType

POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"


class PgVectorScaleConfigDict(TypedDict):
"""These keys will be directly used as kwargs in psycopg connection string,
so the names must match exactly psycopg API"""

user: str
password: str
host: str
port: int
dbname: str


class PgVectorScaleConfig(DBConfig):
user_name: SecretStr = SecretStr("postgres")
password: SecretStr
host: str = "localhost"
port: int = 5432
db_name: str

def to_dict(self) -> PgVectorScaleConfigDict:
user_str = self.user_name.get_secret_value()
pwd_str = self.password.get_secret_value()
return {
"host": self.host,
"port": self.port,
"dbname": self.db_name,
"user": user_str,
"password": pwd_str,
}


class PgVectorScaleIndexConfig(BaseModel, DBCaseConfig):
metric_type: MetricType | None = None
create_index_before_load: bool = False
create_index_after_load: bool = True

def parse_metric(self) -> str:
if self.metric_type == MetricType.COSINE:
return "vector_cosine_ops"
return ""

def parse_metric_fun_op(self) -> LiteralString:
if self.metric_type == MetricType.COSINE:
return "<=>"
return ""

def parse_metric_fun_str(self) -> str:
if self.metric_type == MetricType.COSINE:
return "cosine_distance"
return ""

@abstractmethod
def index_param(self) -> dict:
...

@abstractmethod
def search_param(self) -> dict:
...

@abstractmethod
def session_param(self) -> dict:
...


class PgVectorScaleStreamingDiskANNConfig(PgVectorScaleIndexConfig):
index: IndexType = IndexType.STREAMING_DISKANN
storage_layout: str | None
num_neighbors: int | None
search_list_size: int | None
max_alpha: float | None
num_dimensions: int | None
num_bits_per_dimension: int | None
query_search_list_size: int | None
query_rescore: int | None

def index_param(self) -> dict:
return {
"metric": self.parse_metric(),
"index_type": self.index.value,
"options": {
"storage_layout": self.storage_layout,
"num_neighbors": self.num_neighbors,
"search_list_size": self.search_list_size,
"max_alpha": self.max_alpha,
"num_dimensions": self.num_dimensions,
},
}

def search_param(self) -> dict:
return {
"metric": self.parse_metric(),
"metric_fun_op": self.parse_metric_fun_op(),
}

def session_param(self) -> dict:
return {
"diskann.query_search_list_size": self.query_search_list_size,
"diskann.query_rescore": self.query_rescore,
}

_pgvectorscale_case_config = {
IndexType.STREAMING_DISKANN: PgVectorScaleStreamingDiskANNConfig,
}
Loading

0 comments on commit 5857f5c

Please sign in to comment.