From 1a4c36a266616344303a5fb5ed38a1b79a9622de Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 16 Jun 2021 13:30:50 -0700 Subject: [PATCH 1/2] Fix incorrect dtype precision upgrade for VectorIndex --- src/hdmf/common/table.py | 17 +++++---- tests/unit/common/test_table.py | 63 +++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 39107e074..3199957e4 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -107,28 +107,33 @@ def add_vector(self, arg, **kwargs): def __check_precision(self, idx): """ - Check precision of current dataset and, if - necessary, adjust precision to accommodate new value. + Check precision of current dataset and, if necessary, adjust precision to accommodate new value. Returns: unsigned integer encoding of idx """ if idx > self.__maxval: - nbits = (np.log2(self.__maxval + 1) * 2) + while idx > self.__maxval: + nbits = (np.log2(self.__maxval + 1) * 2) # 8->16, 16->32, 32->64 + if nbits == 128: # pragma: no cover + msg = ('Cannot store more than 18446744073709551615 elements in a VectorData. Largest dtype ' + 'allowed for VectorIndex is uint64.') + raise ValueError(msg) + self.__maxval = 2 ** nbits - 1 self.__uint = np.dtype('uint%d' % nbits).type - self.__maxval = 2 ** nbits - 1 self.__adjust_precision(self.__uint) return self.__uint(idx) def __adjust_precision(self, uint): """ - Adjust precision of data to specificied unsigned integer precision + Adjust precision of data to specificied unsigned integer precision. """ if isinstance(self.data, list): for i in range(len(self.data)): self.data[i] = uint(self.data[i]) elif isinstance(self.data, np.ndarray): - self._VectorIndex__data = self.data.astype(uint) + # use self._Data__data to work around restriction on resetting self.data + self._Data__data = self.data.astype(uint) else: raise ValueError("cannot adjust precision of type %s to %s", (type(self.data), uint)) diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 0508d837a..24d71eb96 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -1838,3 +1838,66 @@ def test_dtr_references(self): 'y': [read_group1, read_group2]}, index=pd.Index(data=[102, 103], name='id')) pd.testing.assert_frame_equal(ret, expected) + + +class TestVectorIndexDtype(TestCase): + + def set_up_array_index(self): + data = VectorData(name='data', description='desc') + index = VectorIndex(name='index', data=np.array([]), target=data) + return index + + def set_up_list_index(self): + data = VectorData(name='data', description='desc') + index = VectorIndex(name='index', data=[], target=data) + return index + + def test_array_inc_precision(self): + index = self.set_up_array_index() + index.add_vector(np.empty((255, ))) + self.assertEqual(index.data[0], 255) + self.assertEqual(index.data.dtype, np.uint8) + + def test_array_inc_precision_1step(self): + index = self.set_up_array_index() + index.add_vector(np.empty((65535, ))) + self.assertEqual(index.data[0], 65535) + self.assertEqual(index.data.dtype, np.uint16) + + def test_array_inc_precision_2steps(self): + index = self.set_up_array_index() + index.add_vector(np.empty((65536, ))) + self.assertEqual(index.data[0], 65536) + self.assertEqual(index.data.dtype, np.uint32) + + def test_array_prev_data_inc_precision_2steps(self): + index = self.set_up_array_index() + index.add_vector(np.empty((255, ))) # dtype is still uint8 + index.add_vector(np.empty((65536, ))) + self.assertEqual(index.data[0], 255) # make sure the 255 is upgraded + self.assertEqual(index.data.dtype, np.uint32) + + def test_list_inc_precision(self): + index = self.set_up_list_index() + index.add_vector(list(range(255))) + self.assertEqual(index.data[0], 255) + self.assertEqual(type(index.data[0]), np.uint8) + + def test_list_inc_precision_1step(self): + index = self.set_up_list_index() + index.add_vector(list(range(65535))) + self.assertEqual(index.data[0], 65535) + self.assertEqual(type(index.data[0]), np.uint16) + + def test_list_inc_precision_2steps(self): + index = self.set_up_list_index() + index.add_vector(list(range(65536))) + self.assertEqual(index.data[0], 65536) + self.assertEqual(type(index.data[0]), np.uint32) + + def test_list_prev_data_inc_precision_2steps(self): + index = self.set_up_list_index() + index.add_vector(list(range(255))) + index.add_vector(list(range(65536 - 255))) + self.assertEqual(index.data[0], 255) # make sure the 255 is upgraded + self.assertEqual(type(index.data[0]), np.uint32) From f6d21d258de72b654ab96c996a2f56818f06c0e3 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 16 Jun 2021 13:40:15 -0700 Subject: [PATCH 2/2] Update changelog --- CHANGELOG.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 985a4edb0..940054e8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,19 @@ # HDMF Changelog -## HDMF 2.5.8 (Upcoming) +## HDMF 2.5.8 (June 16, 2021) ### Minor improvements - Improve Sphinx documentation. @rly (#627) +### Bug fix +- Fix error with representing an indexed table column when the `VectorIndex` dtype precision is upgraded more + than one step, e.g., uint8 to uint32. This can happen when, for example, a single `add_row` call is used to + add more than 65535 elements to an empty indexed column. @rly (#631) + ## HDMF 2.5.7 (June 4, 2021) ### Bug fix -- Fix generation of extension classes that extend MultiContainerInterface and use a custom _fieldsname. @rly (#626) +- Fix generation of extension classes that extend `MultiContainerInterface` and use a custom _fieldsname. @rly (#626) ## HDMF 2.5.6 (May 19, 2021)