From 3704bbe4a73e4607063c0dcbc2261edaa6d4fb2d Mon Sep 17 00:00:00 2001 From: Matthijs Douze Date: Wed, 26 Apr 2023 02:07:11 -0700 Subject: [PATCH] Add GIST1M to datasets Summary: GIST1M is on the fair cluster but was not added to the datsets.py Reviewed By: alexanderguzhva Differential Revision: D45276664 fbshipit-source-id: 8db41d61b78983f5d01dedca1790618f80f6bc78 --- contrib/datasets.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/contrib/datasets.py b/contrib/datasets.py index c06cd9bb26..f37a2fb6e4 100644 --- a/contrib/datasets.py +++ b/contrib/datasets.py @@ -311,6 +311,33 @@ def get_groundtruth(self, k=None): gt = gt[:, :k] return gt +class DatasetGIST1M(Dataset): + """ + The original dataset is available at: http://corpus-texmex.irisa.fr/ + (ANN_SIFT1M) + """ + + def __init__(self): + Dataset.__init__(self) + self.d, self.nt, self.nb, self.nq = 960, 100000, 1000000, 10000 + self.basedir = dataset_basedir + 'gist1M/' + + def get_queries(self): + return fvecs_read(self.basedir + "gist_query.fvecs") + + def get_train(self, maxtrain=None): + maxtrain = maxtrain if maxtrain is not None else self.nt + return fvecs_read(self.basedir + "gist_learn.fvecs")[:maxtrain] + + def get_database(self): + return fvecs_read(self.basedir + "gist_base.fvecs") + + def get_groundtruth(self, k=None): + gt = ivecs_read(self.basedir + "gist_groundtruth.ivecs") + if k is not None: + assert k <= 100 + gt = gt[:, :k] + return gt def dataset_from_name(dataset='deep1M', download=False): @@ -321,6 +348,9 @@ def dataset_from_name(dataset='deep1M', download=False): if dataset == 'sift1M': return DatasetSIFT1M() + elif dataset == 'gist1M': + return DatasetGIST1M() + elif dataset.startswith('bigann'): dbsize = 1000 if dataset == "bigann1B" else int(dataset[6:-1]) return DatasetBigANN(nb_M=dbsize)