From 1a4c36a266616344303a5fb5ed38a1b79a9622de Mon Sep 17 00:00:00 2001
From: Ryan Ly <rly@lbl.gov>
Date: Wed, 16 Jun 2021 13:30:50 -0700
Subject: [PATCH 1/2] Fix incorrect dtype precision upgrade for VectorIndex

---
 src/hdmf/common/table.py        | 17 +++++----
 tests/unit/common/test_table.py | 63 +++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py
index 39107e074..3199957e4 100644
--- a/src/hdmf/common/table.py
+++ b/src/hdmf/common/table.py
@@ -107,28 +107,33 @@ def add_vector(self, arg, **kwargs):
 
     def __check_precision(self, idx):
         """
-        Check precision of current dataset and, if
-        necessary, adjust precision to accommodate new value.
+        Check precision of current dataset and, if necessary, adjust precision to accommodate new value.
 
         Returns:
             unsigned integer encoding of idx
         """
         if idx > self.__maxval:
-            nbits = (np.log2(self.__maxval + 1) * 2)
+            while idx > self.__maxval:
+                nbits = (np.log2(self.__maxval + 1) * 2)  # 8->16, 16->32, 32->64
+                if nbits == 128:  # pragma: no cover
+                    msg = ('Cannot store more than 18446744073709551615 elements in a VectorData. Largest dtype '
+                           'allowed for VectorIndex is uint64.')
+                    raise ValueError(msg)
+                self.__maxval = 2 ** nbits - 1
             self.__uint = np.dtype('uint%d' % nbits).type
-            self.__maxval = 2 ** nbits - 1
             self.__adjust_precision(self.__uint)
         return self.__uint(idx)
 
     def __adjust_precision(self, uint):
         """
-        Adjust precision of data to specificied unsigned integer precision
+        Adjust precision of data to specificied unsigned integer precision.
         """
         if isinstance(self.data, list):
             for i in range(len(self.data)):
                 self.data[i] = uint(self.data[i])
         elif isinstance(self.data, np.ndarray):
-            self._VectorIndex__data = self.data.astype(uint)
+            # use self._Data__data to work around restriction on resetting self.data
+            self._Data__data = self.data.astype(uint)
         else:
             raise ValueError("cannot adjust precision of type %s to %s", (type(self.data), uint))
 
diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py
index 0508d837a..24d71eb96 100644
--- a/tests/unit/common/test_table.py
+++ b/tests/unit/common/test_table.py
@@ -1838,3 +1838,66 @@ def test_dtr_references(self):
                                      'y': [read_group1, read_group2]},
                                     index=pd.Index(data=[102, 103], name='id'))
             pd.testing.assert_frame_equal(ret, expected)
+
+
+class TestVectorIndexDtype(TestCase):
+
+    def set_up_array_index(self):
+        data = VectorData(name='data', description='desc')
+        index = VectorIndex(name='index', data=np.array([]), target=data)
+        return index
+
+    def set_up_list_index(self):
+        data = VectorData(name='data', description='desc')
+        index = VectorIndex(name='index', data=[], target=data)
+        return index
+
+    def test_array_inc_precision(self):
+        index = self.set_up_array_index()
+        index.add_vector(np.empty((255, )))
+        self.assertEqual(index.data[0], 255)
+        self.assertEqual(index.data.dtype, np.uint8)
+
+    def test_array_inc_precision_1step(self):
+        index = self.set_up_array_index()
+        index.add_vector(np.empty((65535, )))
+        self.assertEqual(index.data[0], 65535)
+        self.assertEqual(index.data.dtype, np.uint16)
+
+    def test_array_inc_precision_2steps(self):
+        index = self.set_up_array_index()
+        index.add_vector(np.empty((65536, )))
+        self.assertEqual(index.data[0], 65536)
+        self.assertEqual(index.data.dtype, np.uint32)
+
+    def test_array_prev_data_inc_precision_2steps(self):
+        index = self.set_up_array_index()
+        index.add_vector(np.empty((255, )))  # dtype is still uint8
+        index.add_vector(np.empty((65536, )))
+        self.assertEqual(index.data[0], 255)  # make sure the 255 is upgraded
+        self.assertEqual(index.data.dtype, np.uint32)
+
+    def test_list_inc_precision(self):
+        index = self.set_up_list_index()
+        index.add_vector(list(range(255)))
+        self.assertEqual(index.data[0], 255)
+        self.assertEqual(type(index.data[0]), np.uint8)
+
+    def test_list_inc_precision_1step(self):
+        index = self.set_up_list_index()
+        index.add_vector(list(range(65535)))
+        self.assertEqual(index.data[0], 65535)
+        self.assertEqual(type(index.data[0]), np.uint16)
+
+    def test_list_inc_precision_2steps(self):
+        index = self.set_up_list_index()
+        index.add_vector(list(range(65536)))
+        self.assertEqual(index.data[0], 65536)
+        self.assertEqual(type(index.data[0]), np.uint32)
+
+    def test_list_prev_data_inc_precision_2steps(self):
+        index = self.set_up_list_index()
+        index.add_vector(list(range(255)))
+        index.add_vector(list(range(65536 - 255)))
+        self.assertEqual(index.data[0], 255)  # make sure the 255 is upgraded
+        self.assertEqual(type(index.data[0]), np.uint32)

From f6d21d258de72b654ab96c996a2f56818f06c0e3 Mon Sep 17 00:00:00 2001
From: Ryan Ly <rly@lbl.gov>
Date: Wed, 16 Jun 2021 13:40:15 -0700
Subject: [PATCH 2/2] Update changelog

---
 CHANGELOG.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 985a4edb0..940054e8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,14 +1,19 @@
 # HDMF Changelog
 
-## HDMF 2.5.8 (Upcoming)
+## HDMF 2.5.8 (June 16, 2021)
 
 ### Minor improvements
 - Improve Sphinx documentation. @rly (#627)
 
+### Bug fix
+- Fix error with representing an indexed table column when the `VectorIndex` dtype precision is upgraded more
+  than one step, e.g., uint8 to uint32. This can happen when, for example, a single `add_row` call is used to
+  add more than 65535 elements to an empty indexed column. @rly (#631)
+
 ## HDMF 2.5.7 (June 4, 2021)
 
 ### Bug fix
-- Fix generation of extension classes that extend MultiContainerInterface and use a custom _fieldsname. @rly (#626)
+- Fix generation of extension classes that extend `MultiContainerInterface` and use a custom _fieldsname. @rly (#626)
 
 ## HDMF 2.5.6 (May 19, 2021)