Skip to content

Commit

Permalink
Submit BBAnn (#70)
Browse files Browse the repository at this point in the history
Signed-off-by: Patrick Weizhi Xu <[email protected]>
  • Loading branch information
PwzXxm authored Mar 10, 2023
1 parent 3b64770 commit 74f70e4
Show file tree
Hide file tree
Showing 4 changed files with 357 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ jobs:
- algorithm: diskann-t2
dataset: random-range-xs
library: diskann
- algorithm: bbann
dataset: random-xs
library: bbann
- algorithm: bbann
dataset: random-range-xs
library: bbann
- algorithm: httpann_example
dataset: random-xs
library: httpann_example
Expand Down
94 changes: 94 additions & 0 deletions algos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ random-range-xs:
args:
- [ 0.2, 0.4, 1.0 ]
query-args: []
bbann:
docker-tag: billion-scale-benchmark-bbann
module: benchmark.algorithms.bbann
constructor: BbANN
base-args: ["@metric"]
run-groups:
base:
args: |
[{"hnswM":32, "hnswefC":200, "K1":2, "blockSize":4096, "identifier": "bbann"}]
query-args: |
[{"rangeSearchProbeCount":20, "efSearch":100, "K1":2, "blockSize":4096, "radiusFactor":1.00, "identifier":"bbann"}]
random-xs:
faiss-t1:
docker-tag: billion-scale-benchmark-faissconda
Expand Down Expand Up @@ -115,6 +126,17 @@ random-xs:
args:
- [ 0.2, 0.8, 1.0 ]
query-args: [ ]
bbann:
docker-tag: billion-scale-benchmark-bbann
module: benchmark.algorithms.bbann
constructor: BbANN
base-args: ["@metric"]
run-groups:
base:
args: |
[{"hnswM":32, "hnswefC":200, "K1":2, "blockSize":4096, "identifier": "bbann"}]
query-args: |
[{"nProbe":5, "efSearch":100, "K1":2, "blockSize":4096, "identifier": "bbann"}]
deep-10M:
faiss-t1:
docker-tag: billion-scale-benchmark-faissconda
Expand Down Expand Up @@ -274,6 +296,30 @@ msspacev-1B:
{"Ls":110, "BW":4, "T":16},
{"Ls":120, "BW":4, "T":16},
{"Ls":130, "BW":4, "T":16}]
bbann:
docker-tag: billion-scale-benchmark-bbann
module: benchmark.algorithms.bbann
constructor: BbANN
base-args: ["@metric"]
run-groups:
base:
args: |
[{"hnswM":32, "hnswefC":500, "K1":128, "blockSize":4096, "sample":1, "identifier":"bbann",
"url": "https://zilliznfs.blob.core.windows.net/bbann/Upload_Candidate_SPACEV_M32_Sample1"
}]
query-args: |
[
{"nProbe": 50, "efSearch":250, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":140, "efSearch":280, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":140, "efSearch":420, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":150, "efSearch":300, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":150, "efSearch":450, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":160, "efSearch":320, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":160, "efSearch":480, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":170, "efSearch":340, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":170, "efSearch":510, "K1":128, "blockSize":4096, "identifier": "bbann"},
{"nProbe":180, "efSearch":360, "K1":128, "blockSize":4096, "identifier": "bbann"}
]
msturing-1B:
faiss-t1:
docker-tag: billion-scale-benchmark-faissconda
Expand Down Expand Up @@ -459,6 +505,30 @@ ssnpp-1B:
{"Lmin":8, "Lmax":45000, "BW":4, "T":16},
{"Lmin":12, "Lmax":45000, "BW":4, "T":16},
{"Lmin":16, "Lmax":45000, "BW":4, "T":16}]
bbann:
docker-tag: billion-scale-benchmark-bbann
module: benchmark.algorithms.bbann
constructor: BbANN
base-args: ["@metric"]
run-groups:
base:
args: |
[{"hnswM":32, "hnswefC":500, "K1":128, "blockSize":4096, "identifier":"bbann",
"url": "https://zilliznfs.blob.core.windows.net/bbann/Upload_Candidate_SSNPP"
}]
query-args: |
[
{"rangeSearchProbeCount":20, "efSearch":250, "K1":128, "blockSize":4096, "radiusFactor":0.95, "identifier":"bbann"},
{"rangeSearchProbeCount":20, "efSearch":230, "K1":128, "blockSize":4096, "radiusFactor":0.95, "identifier":"bbann"},
{"rangeSearchProbeCount":20, "efSearch":210, "K1":128, "blockSize":4096, "radiusFactor":0.95, "identifier":"bbann"},
{"rangeSearchProbeCount":20, "efSearch":170, "K1":128, "blockSize":4096, "radiusFactor":0.95, "identifier":"bbann"},
{"rangeSearchProbeCount":20, "efSearch":150, "K1":128, "blockSize":4096, "radiusFactor":0.95, "identifier":"bbann"},
{"rangeSearchProbeCount":15, "efSearch":230, "K1":128, "blockSize":4096, "radiusFactor":0.95, "identifier":"bbann"},
{"rangeSearchProbeCount":15, "efSearch":200, "K1":128, "blockSize":4096, "radiusFactor":0.95, "identifier":"bbann"},
{"rangeSearchProbeCount":15, "efSearch":200, "K1":128, "blockSize":4096, "radiusFactor":0.96, "identifier":"bbann"},
{"rangeSearchProbeCount":25, "efSearch":200, "K1":128, "blockSize":4096, "radiusFactor":0.93, "identifier":"bbann"},
{"rangeSearchProbeCount":25, "efSearch":200, "K1":128, "blockSize":4096, "radiusFactor":0.94, "identifier":"bbann"}
]
text2image-1B:
faiss-t1:
docker-tag: billion-scale-benchmark-faissconda
Expand Down Expand Up @@ -515,6 +585,30 @@ text2image-1B:
{"Ls":80, "BW":10, "T":16},
{"Ls":90, "BW":10, "T":16},
{"Ls":100, "BW":10, "T":16}]
bbann:
docker-tag: billion-scale-benchmark-bbann
module: benchmark.algorithms.bbann
constructor: BbANN
base-args: ["@metric"]
run-groups:
base:
args: |
[{"hnswM":32, "hnswefC":500, "K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann",
"url": "https://zilliznfs.blob.core.windows.net/bbann/BBANN_Text2Image_M32_BPP2_SQSample1"
}]
query-args: |
[
{"nProbe":60, "efSearch":100,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":60, "efSearch":120,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":70, "efSearch":140,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":80, "efSearch":160,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":90, "efSearch":140,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":90, "efSearch":150,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":90, "efSearch":160,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":100,"efSearch":100,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":100,"efSearch":110,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"},
{"nProbe":100,"efSearch":120,"K1":200, "blockSize":8192, "vector_use_sq":true, "use_hnsw_sq": false, "identifier":"bbann"}
]
ssnpp-10M:
faiss-t1:
docker-tag: billion-scale-benchmark-faissconda
Expand Down
244 changes: 244 additions & 0 deletions benchmark/algorithms/bbann.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
import os
import time
import numpy as np
import psutil
from benchmark.algorithms.base import BaseANN
from benchmark.datasets import DATASETS, download_accelerated

import bbannpy


class BbANN(BaseANN):
def __init__(self, metric, index_params):
self.name = "BBANN"
self.metric = metric
self.index_params = index_params
self.identifier = index_params.get("identifier")
self.para = bbannpy.BBAnnParameters()
print("init BbAnn with the following parameters")
for key in index_params:
print(key, index_params[key])
if hasattr(self.para, key):
setattr(self.para, key, index_params[key])

def set_query_arguments(self, query_args):
print("query BbAnn with the following parameters")
for key in query_args:
print(key, query_args[key])
if hasattr(self.para, key):
setattr(self.para, key, query_args[key])
pass

def done(self):
pass

def track(self):
return "T2"

def index_name(self):
"""
File name for the index.
"""
return f"{self.identifier}_index"

def create_index_dir(self, dataset):
"""
Return a folder name, in which we would store the index.
"""
print("trying to create index dir:", self.index_params)
if "overrideIndexPath" in self.index_params:
print("Override index path:", self.index_params["overrideIndexPath"])
return self.index_params["overrideIndexPath"]
index_dir = os.path.join(os.getcwd(), "data", "indices")
os.makedirs(index_dir, mode=0o777, exist_ok=True)
index_dir = os.path.join(index_dir, "T2")
os.makedirs(index_dir, mode=0o777, exist_ok=True)
index_dir = os.path.join(index_dir, self.__str__())
os.makedirs(index_dir, mode=0o777, exist_ok=True)
index_dir = os.path.join(index_dir, dataset.short_name())
os.makedirs(index_dir, mode=0o777, exist_ok=True)
index_dir = os.path.join(index_dir, self.index_name())
os.makedirs(index_dir, mode=0o777, exist_ok=True)
print(index_dir)
return index_dir

def get_index_dir(self, dataset):
if "overrideIndexPath" in self.index_params:
print("Override index path:", self.index_params["overrideIndexPath"])
return self.index_params["overrideIndexPath"]
index_dir = os.path.join(os.getcwd(), "data", "indices")
index_dir = os.path.join(index_dir, "T2")
index_dir = os.path.join(index_dir, self.__str__())
index_dir = os.path.join(index_dir, dataset.short_name())
index_dir = os.path.join(index_dir, self.index_name())
return index_dir

def get_index_components(self, dataset):
index_components = [
'bucket-centroids.bin', 'hnsw-index.bin', 'cluster-combine_ids.bin'
]

K1 = self.index_params.get("K1")
for i in range(K1):
raw_data = 'cluster-' + str(i) + '-global_ids.bin'
global_ids = 'cluster-' + str(i) + '-raw_data.bin'
index_components = index_components + [raw_data, global_ids]

ds = DATASETS[dataset]()
if 'vector_use_sq' in self.index_params and self.index_params['vector_use_sq']:
index_components = index_components + ['meta']

if 'use_hnsw_sq' in self.index_params and self.index_params['use_hnsw_sq']:
index_components = index_components + ['hnsw-index.bin.codes']

return index_components

def index_files_to_store(self, dataset):
"""
Specify a triplet with the local directory path of index files,
the common prefix name of index component(s) and a list of
index components that need to be uploaded to (after build)
or downloaded from (for search) cloud storage.
For local directory path under docker environment, please use
a directory under
data/indices/track(T1 or T2)/algo.__str__()/DATASETS[dataset]().short_name()
"""

return [self.create_index_dir(DATASETS[dataset]()) # local_dir
, "" # index_prefix
, self.get_index_components(dataset) # components
]

def set_index_type(self, ds_distance, ds_dtype):
if ds_distance == "euclidean":
self.para.metric = bbannpy.L2
elif ds_distance == "ip":
self.para.metric = bbannpy.INNER_PRODUCT
else:
print("Unsuported distance function.")
return False

if not hasattr(self, 'index'):
if ds_dtype == "float32":
self.index = bbannpy.FloatIndex(self.para.metric)
elif ds_dtype == "int8":
self.index = bbannpy.Int8Index(self.para.metric)
elif ds_dtype == "uint8":
self.index = bbannpy.UInt8Index(self.para.metric)
else:
print("Unsupported data type.")
return False
return True

def fit(self, dataset):
"""
Build the index for the data points given in dataset name.
Assumes that after fitting index is loaded in memory.
"""
ds = DATASETS[dataset]()
d = ds.d
index_dir = self.create_index_dir(ds)
if not self.set_index_type(ds.distance(), ds.dtype):
return False

self.para.dataFilePath = ds.get_dataset_fn()
self.para.indexPrefixPath = index_dir+"/"

start = time.time()
self.index.build(self.para)
end = time.time()
print("bbAnn index built in %.3f s" % (end - start))
print(f"Loading index from {self.para.indexPrefixPath}")
self.index.load_index(self.para.indexPrefixPath, self.para)

def load_index(self, dataset):
"""
Load the index for dataset. Returns False if index
is not available, True otherwise.
Checking the index usually involves the dataset name
and the index build paramters passed during construction.
"""
ds = DATASETS[dataset]()
index_dir = self.get_index_dir(ds)
if not (os.path.exists(index_dir)) and 'url' not in self.index_params:
return False
if not self.set_index_type(ds.distance(), ds.dtype):
return False
self.para.indexPrefixPath = index_dir + "/"

index_components = self.get_index_components(dataset)
for component in index_components:
index_file = self.para.indexPrefixPath + component
if not (os.path.exists(index_file)):
if 'url' in self.index_params:
index_file_source = self.index_params['url'] + '/' + component
print(f"Downloading index in background. This can take a while.")
download_accelerated(index_file_source, index_file, quiet=True)
else:
return False

print(f"Loading index from {self.para.indexPrefixPath}")
self.index.load_index(self.para.indexPrefixPath, self.para)
return True

def query(self, X, k):
"""Carry out a batch query for k-NN of query set X."""
nq, dim = (np.shape(X))
self.res, self.query_dists = self.index.batch_search(
X, dim, nq, k, self.para)
# print(self.res)

def range_query(self, X, radius):
"""
Carry out a batch query for range search with
radius.
"""
nq, dim = (np.shape(X))
# print(f"Query index with para {self.para}")
self.rangeres_lim, (self.rangeres_ids, self.rangeres_dists) = self.index.range_search(
X, dim, nq, radius, self.para)

def get_results(self):
"""
Helper method to convert query results of k-NN search.
If there are nq queries, returns a (nq, k) array of integers
representing the indices of the k-NN for each query.
"""
return self.res

def get_range_results(self):
"""
Helper method to convert query results of range search.
If there are nq queries, returns a triple lims, I, D.
lims is a (nq) array, such that
I[lims[q]:lims[q + 1]] in int
are the indiices of the indices of the range results of query q, and
D[lims[q]:lims[q + 1]] in float
are the distances.
--------------------------
NEED TO CHECK THE ORDER of I and D in plot.py/ utils.py.
--------------------------
"""
return (self.rangeres_lim, self.rangeres_dists, self.rangeres_ids)

def get_additional(self):
"""
Allows to retrieve additional results.
"""
return {}

def __str__(self):
return self.name

def get_memory_usage(self):
"""Return the current memory usage of this algorithm instance
(in kilobytes), or None if this information is not available."""
# return in kB for backwards compatibility
return psutil.Process().memory_info().rss / 1024
Loading

0 comments on commit 74f70e4

Please sign in to comment.