Skip to content

Commit

Permalink
Merge branch 'dev' into zarr_append
Browse files Browse the repository at this point in the history
  • Loading branch information
mavaylon1 authored Aug 21, 2024
2 parents 07d879f + 2b167ae commit f7c5bc4
Show file tree
Hide file tree
Showing 14 changed files with 195 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
# hooks:
# - id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.7
rev: v0.6.1
hooks:
- id: ruff
# - repo: https://github.com/econchick/interrogate
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@

### Enhancements
- Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157)
- Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166)
- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165)
- Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173)

## HDMF 3.14.3 (July 29, 2024)

### Enhancements
- Added new attribute "dimension_labels" on `DatasetBuilder` which specifies the names of the dimensions used in the
dataset based on the shape of the dataset data and the dimension names in the spec for the data type. This attribute
is available on build (during the write process), but not on read of a dataset from a file. @rly [#1081](https://github.com/hdmf-dev/hdmf/pull/1081)
- Speed up loading namespaces by skipping register_type when already registered. @magland [#1102](https://github.com/hdmf-dev/hdmf/pull/1102)
- Speed up namespace loading: return a shallow copy rather than a deep copy in build_const_args. @magland [#1103](https://github.com/hdmf-dev/hdmf/pull/1103)

## HDMF 3.14.2 (July 7, 2024)

Expand Down
8 changes: 4 additions & 4 deletions src/hdmf/backends/hdf5/h5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ def __init__(self, **kwargs):
# Check for possible collision with other parameters
if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data:
self.__link_data = False
warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2)
warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=3)
# Call the super constructor and consume the data parameter
super().__init__(**kwargs)
# Construct the dict with the io args, ignoring all options that were set to None
Expand All @@ -533,7 +533,7 @@ def __init__(self, **kwargs):
self.__iosettings.pop('compression', None)
if 'compression_opts' in self.__iosettings:
warnings.warn('Compression disabled by compression=False setting. ' +
'compression_opts parameter will, therefore, be ignored.', stacklevel=2)
'compression_opts parameter will, therefore, be ignored.', stacklevel=3)
self.__iosettings.pop('compression_opts', None)
# Validate the compression options used
self._check_compression_options()
Expand All @@ -548,7 +548,7 @@ def __init__(self, **kwargs):
if isinstance(self.data, Dataset):
for k in self.__iosettings.keys():
warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k,
stacklevel=2)
stacklevel=3)

self.__dataset = None

Expand Down Expand Up @@ -626,7 +626,7 @@ def _check_compression_options(self):
if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]:
warnings.warn(str(self.__iosettings['compression']) + " compression may not be available "
"on all installations of HDF5. Use of gzip is recommended to ensure portability of "
"the generated HDF5 files.", stacklevel=3)
"the generated HDF5 files.", stacklevel=4)

@staticmethod
def filter_available(filter, allow_plugin_filters):
Expand Down
4 changes: 2 additions & 2 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def copy_file(self, **kwargs):
warnings.warn("The copy_file class method is no longer supported and may be removed in a future version of "
"HDMF. Please use the export method or h5py.File.copy method instead.",
category=DeprecationWarning,
stacklevel=2)
stacklevel=3)

source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename',
'dest_filename',
Expand Down Expand Up @@ -1469,7 +1469,7 @@ def __list_fill__(cls, parent, name, data, options=None):
data_shape = io_settings.pop('shape')
elif hasattr(data, 'shape'):
data_shape = data.shape
elif isinstance(dtype, np.dtype):
elif isinstance(dtype, np.dtype) and len(dtype) > 1: # check if compound dtype
data_shape = (len(data),)
else:
data_shape = get_data_shape(data)
Expand Down
10 changes: 8 additions & 2 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,11 +601,17 @@ def __get_data_type(cls, spec):

def __convert_string(self, value, spec):
"""Convert string types to the specified dtype."""
def __apply_string_type(value, string_type):
if isinstance(value, (list, tuple, np.ndarray, DataIO)):
return [__apply_string_type(item, string_type) for item in value]
else:
return string_type(value)

ret = value
if isinstance(spec, AttributeSpec):
if 'text' in spec.dtype:
if spec.shape is not None or spec.dims is not None:
ret = list(map(str, value))
ret = __apply_string_type(value, str)
else:
ret = str(value)
elif isinstance(spec, DatasetSpec):
Expand All @@ -621,7 +627,7 @@ def string_type(x):
return x.isoformat() # method works for both date and datetime
if string_type is not None:
if spec.shape is not None or spec.dims is not None:
ret = list(map(string_type, value))
ret = __apply_string_type(value, string_type)
else:
ret = string_type(value)
# copy over any I/O parameters if they were specified
Expand Down
2 changes: 1 addition & 1 deletion src/hdmf/common/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ def add_ref(self, **kwargs):
if entity_uri is not None:
entity_uri = entity.entity_uri
msg = 'This entity already exists. Ignoring new entity uri'
warn(msg, stacklevel=2)
warn(msg, stacklevel=3)

#################
# Validate Object
Expand Down
10 changes: 5 additions & 5 deletions src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ def add_row(self, **kwargs):
warn(("Data has elements with different lengths and therefore cannot be coerced into an "
"N-dimensional array. Use the 'index' argument when creating a column to add rows "
"with different lengths."),
stacklevel=2)
stacklevel=3)

def __eq__(self, other):
"""Compare if the two DynamicTables contain the same data.
Expand Down Expand Up @@ -776,7 +776,7 @@ def add_column(self, **kwargs): # noqa: C901

if isinstance(index, VectorIndex):
warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be "
"deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=2)
"deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=3)

if name in self.__colids: # column has already been added
msg = "column '%s' already exists in %s '%s'" % (name, self.__class__.__name__, self.name)
Expand All @@ -793,7 +793,7 @@ def add_column(self, **kwargs): # noqa: C901
"Please ensure the new column complies with the spec. "
"This will raise an error in a future version of HDMF."
% (name, self.__class__.__name__, spec_table))
warn(msg, stacklevel=2)
warn(msg, stacklevel=3)

index_bool = index or not isinstance(index, bool)
spec_index = self.__uninit_cols[name].get('index', False)
Expand All @@ -803,7 +803,7 @@ def add_column(self, **kwargs): # noqa: C901
"Please ensure the new column complies with the spec. "
"This will raise an error in a future version of HDMF."
% (name, self.__class__.__name__, spec_index))
warn(msg, stacklevel=2)
warn(msg, stacklevel=3)

spec_col_cls = self.__uninit_cols[name].get('class', VectorData)
if col_cls != spec_col_cls:
Expand Down Expand Up @@ -841,7 +841,7 @@ def add_column(self, **kwargs): # noqa: C901
warn(("Data has elements with different lengths and therefore cannot be coerced into an "
"N-dimensional array. Use the 'index' argument when adding a column of data with "
"different lengths."),
stacklevel=2)
stacklevel=3)

# Check that we are asked to create an index
if (isinstance(index, bool) or isinstance(index, int)) and index > 0 and len(data) > 0:
Expand Down
6 changes: 4 additions & 2 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ def set_dataio(self, **kwargs):
warn(
"Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.",
DeprecationWarning,
stacklevel=2,
stacklevel=3,
)
dataio = getargs('dataio', kwargs)
dataio.data = self.__data
Expand Down Expand Up @@ -1142,7 +1142,9 @@ def _func(self, **kwargs):
# still need to mark self as modified
self.set_modified()
if tmp.name in d:
msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name)
msg = (f"Cannot add {tmp.__class__} '{tmp.name}' at 0x{id(tmp)} to dict attribute '{attr_name}' in "
f"{cls} '{self.name}'. {d[tmp.name].__class__} '{tmp.name}' at 0x{id(d[tmp.name])} "
f"already exists in '{attr_name}' and has the same name.")
raise ValueError(msg)
d[tmp.name] = tmp
return container
Expand Down
28 changes: 16 additions & 12 deletions src/hdmf/spec/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ def __init__(self, **kwargs):
self['full_name'] = full_name
if version == str(SpecNamespace.UNVERSIONED):
# the unversioned version may be written to file as a string and read from file as a string
warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name, stacklevel=2)
warn(f"Loaded namespace '{name}' is unversioned. Please notify the extension author.")
version = SpecNamespace.UNVERSIONED
if version is None:
# version is required on write -- see YAMLSpecWriter.write_namespace -- but can be None on read in order to
# be able to read older files with extensions that are missing the version key.
warn(("Loaded namespace '%s' is missing the required key 'version'. Version will be set to '%s'. "
"Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED), stacklevel=2)
warn(f"Loaded namespace '{name}' is missing the required key 'version'. Version will be set to "
f"'{SpecNamespace.UNVERSIONED}'. Please notify the extension author.")
version = SpecNamespace.UNVERSIONED
self['version'] = version
if date is not None:
Expand Down Expand Up @@ -466,15 +466,19 @@ def __load_namespace(self, namespace, reader, resolve=True):
return included_types

def __register_type(self, ndt, inc_ns, catalog, registered_types):
spec = inc_ns.get_spec(ndt)
spec_file = inc_ns.catalog.get_spec_source_file(ndt)
self.__register_dependent_types(spec, inc_ns, catalog, registered_types)
if isinstance(spec, DatasetSpec):
built_spec = self.dataset_spec_cls.build_spec(spec)
if ndt in registered_types:
# already registered
pass
else:
built_spec = self.group_spec_cls.build_spec(spec)
registered_types.add(ndt)
catalog.register_spec(built_spec, spec_file)
spec = inc_ns.get_spec(ndt)
spec_file = inc_ns.catalog.get_spec_source_file(ndt)
self.__register_dependent_types(spec, inc_ns, catalog, registered_types)
if isinstance(spec, DatasetSpec):
built_spec = self.dataset_spec_cls.build_spec(spec)
else:
built_spec = self.group_spec_cls.build_spec(spec)
registered_types.add(ndt)
catalog.register_spec(built_spec, spec_file)

def __register_dependent_types(self, spec, inc_ns, catalog, registered_types):
"""Ensure that classes for all types used by this type are registered
Expand Down Expand Up @@ -529,7 +533,7 @@ def load_namespaces(self, **kwargs):
if ns['version'] != self.__namespaces.get(ns['name'])['version']:
# warn if the cached namespace differs from the already loaded namespace
warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
% (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']), stacklevel=2)
% (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']))
else:
to_load.append(ns)
# now load specs into namespace
Expand Down
5 changes: 2 additions & 3 deletions src/hdmf/spec/spec.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
from abc import ABCMeta
from collections import OrderedDict
from copy import deepcopy
from warnings import warn

from ..utils import docval, getargs, popargs, get_docval
Expand Down Expand Up @@ -84,7 +83,7 @@ class ConstructableDict(dict, metaclass=ABCMeta):
def build_const_args(cls, spec_dict):
''' Build constructor arguments for this ConstructableDict class from a dictionary '''
# main use cases are when spec_dict is a ConstructableDict or a spec dict read from a file
return deepcopy(spec_dict)
return spec_dict.copy()

@classmethod
def build_spec(cls, spec_dict):
Expand Down Expand Up @@ -322,7 +321,7 @@ def __init__(self, **kwargs):
default_name = getargs('default_name', kwargs)
if default_name:
if name is not None:
warn("found 'default_name' with 'name' - ignoring 'default_name'", stacklevel=2)
warn("found 'default_name' with 'name' - ignoring 'default_name'")
else:
self['default_name'] = default_name
self.__attributes = dict()
Expand Down
7 changes: 4 additions & 3 deletions tests/unit/build_tests/test_classgenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,11 @@ def test_dynamic_container_creation(self):
baz_spec = GroupSpec('A test extension with no Container class',
data_type_def='Baz', data_type_inc=self.bar_spec,
attributes=[AttributeSpec('attr3', 'a float attribute', 'float'),
AttributeSpec('attr4', 'another float attribute', 'float')])
AttributeSpec('attr4', 'another float attribute', 'float'),
AttributeSpec('attr_array', 'an array attribute', 'text', shape=(None,)),])
self.spec_catalog.register_spec(baz_spec, 'extension.yaml')
cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE)
expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'skip_post_init'}
expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'skip_post_init'}
received_args = set()

for x in get_docval(cls.__init__):
Expand Down Expand Up @@ -211,7 +212,7 @@ def test_dynamic_container_creation_defaults(self):
AttributeSpec('attr4', 'another float attribute', 'float')])
self.spec_catalog.register_spec(baz_spec, 'extension.yaml')
cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE)
expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'foo', 'skip_post_init'}
expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'foo', 'skip_post_init'}
received_args = set(map(lambda x: x['name'], get_docval(cls.__init__)))
self.assertSetEqual(expected_args, received_args)
self.assertEqual(cls.__name__, 'Baz')
Expand Down
Loading

0 comments on commit f7c5bc4

Please sign in to comment.