Skip to content

Commit

Permalink
Add GIST1M to datasets
Browse files Browse the repository at this point in the history
Summary: GIST1M is on the fair cluster but was not added to the datsets.py

Reviewed By: alexanderguzhva

Differential Revision: D45276664

fbshipit-source-id: 8db41d61b78983f5d01dedca1790618f80f6bc78
  • Loading branch information
mdouze authored and facebook-github-bot committed Apr 26, 2023
1 parent 1cb1e54 commit 3704bbe
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions contrib/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,33 @@ def get_groundtruth(self, k=None):
gt = gt[:, :k]
return gt

class DatasetGIST1M(Dataset):
"""
The original dataset is available at: http://corpus-texmex.irisa.fr/
(ANN_SIFT1M)
"""

def __init__(self):
Dataset.__init__(self)
self.d, self.nt, self.nb, self.nq = 960, 100000, 1000000, 10000
self.basedir = dataset_basedir + 'gist1M/'

def get_queries(self):
return fvecs_read(self.basedir + "gist_query.fvecs")

def get_train(self, maxtrain=None):
maxtrain = maxtrain if maxtrain is not None else self.nt
return fvecs_read(self.basedir + "gist_learn.fvecs")[:maxtrain]

def get_database(self):
return fvecs_read(self.basedir + "gist_base.fvecs")

def get_groundtruth(self, k=None):
gt = ivecs_read(self.basedir + "gist_groundtruth.ivecs")
if k is not None:
assert k <= 100
gt = gt[:, :k]
return gt


def dataset_from_name(dataset='deep1M', download=False):
Expand All @@ -321,6 +348,9 @@ def dataset_from_name(dataset='deep1M', download=False):
if dataset == 'sift1M':
return DatasetSIFT1M()

elif dataset == 'gist1M':
return DatasetGIST1M()

elif dataset.startswith('bigann'):
dbsize = 1000 if dataset == "bigann1B" else int(dataset[6:-1])
return DatasetBigANN(nb_M=dbsize)
Expand Down

0 comments on commit 3704bbe

Please sign in to comment.