From a875a7a19cac54c52ada9dadc4fa154e0755c477 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Tue, 9 Jul 2024 13:38:18 +1000 Subject: [PATCH 1/5] Consistently use 'indexes' over 'indices' Using both is confusing in public interfaces. 'indexes' was used in public interfaces, while 'indices' was only used internally or as the name of positional arguments. I think we can get away with renaming to 'indexes' without a deprecation cycle this way! --- docs/api/conventions/ugrid.rst | 2 +- docs/developing/conventions.rst | 4 +- docs/developing/grass.py | 4 +- src/emsarray/cli/commands/extract_points.py | 2 +- src/emsarray/compat/shapely.py | 12 +- src/emsarray/conventions/_base.py | 36 ++--- src/emsarray/conventions/arakawa_c.py | 8 +- src/emsarray/conventions/grid.py | 8 +- src/emsarray/conventions/shoc.py | 4 +- src/emsarray/conventions/ugrid.py | 132 +++++++++--------- src/emsarray/masking.py | 6 +- src/emsarray/operations/depth.py | 20 +-- src/emsarray/operations/point_extraction.py | 18 ++- src/emsarray/operations/triangulate.py | 20 +-- src/emsarray/transect.py | 20 +-- src/emsarray/utils.py | 6 +- tests/conventions/test_base.py | 4 +- tests/conventions/test_ugrid.py | 86 ++++++------ tests/datasets/make_examples.py | 6 +- tests/masking/test_mask_dataset.py | 2 +- .../test_extract_dataframe.py | 2 +- .../point_extraction/test_extract_points.py | 2 +- .../triangulate/test_triangulate_dataset.py | 32 ++--- 23 files changed, 223 insertions(+), 213 deletions(-) diff --git a/docs/api/conventions/ugrid.rst b/docs/api/conventions/ugrid.rst index ae7f0ce..bd487ee 100644 --- a/docs/api/conventions/ugrid.rst +++ b/docs/api/conventions/ugrid.rst @@ -36,5 +36,5 @@ Topology Masking ======= -.. autofunction:: mask_from_face_indices +.. autofunction:: mask_from_face_indexes .. autofunction:: buffer_faces diff --git a/docs/developing/conventions.rst b/docs/developing/conventions.rst index fcbfa7e..e310872 100644 --- a/docs/developing/conventions.rst +++ b/docs/developing/conventions.rst @@ -69,9 +69,9 @@ and returns a value indicating whether this convention implementation can unders :pyobject: Grass.check_dataset :meth:`.DimensionConvention.unpack_index` and :meth:`.DimensionConvention.pack_index` -transform between native index types and a grid kind and indices. +transform between native index types and a grid kind and indexes. The native representation must be representable as JSON for GeoJSON export support. -The simplest representation is a tuple of (grid_kind, indices): +The simplest representation is a tuple of (grid_kind, indexes): .. literalinclude:: ./grass.py :pyobject: Grass.unpack_index diff --git a/docs/developing/grass.py b/docs/developing/grass.py index 7470cf1..c69a712 100644 --- a/docs/developing/grass.py +++ b/docs/developing/grass.py @@ -41,8 +41,8 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None: def unpack_index(self, index: GrassIndex) -> tuple[GrassGridKind, Sequence[int]]: return index[0], list(index[1]) - def pack_index(self, grid_kind: GrassGridKind, indices: Sequence[int]) -> GrassIndex: - return (grid_kind, list(indices)) + def pack_index(self, grid_kind: GrassGridKind, indexes: Sequence[int]) -> GrassIndex: + return (grid_kind, list(indexes)) @cached_property def grid_dimensions(self) -> dict[GrassGridKind, Sequence[Hashable]]: diff --git a/src/emsarray/cli/commands/extract_points.py b/src/emsarray/cli/commands/extract_points.py index 58fbc78..90224f9 100644 --- a/src/emsarray/cli/commands/extract_points.py +++ b/src/emsarray/cli/commands/extract_points.py @@ -73,7 +73,7 @@ def handle(self, options: argparse.Namespace) -> None: point_dimension=options.point_dimension, missing_points=options.missing_points) except point_extraction.NonIntersectingPoints as err: - rows = dataframe.iloc[err.indices] + rows = dataframe.iloc[err.indexes] raise CommandException( f"Error extracting points: the points in the following rows " f"did not intersect the dataset geometry:\n" diff --git a/src/emsarray/compat/shapely.py b/src/emsarray/compat/shapely.py index d5cf429..0015787 100644 --- a/src/emsarray/compat/shapely.py +++ b/src/emsarray/compat/shapely.py @@ -49,17 +49,17 @@ def query( geom: BaseGeometry, ) -> numpy.ndarray: if shapely_version >= v2: - indices = self.index.query(geom) + indexes = self.index.query(geom) else: - indices = self.index._query(geom) - return cast(numpy.ndarray, self.items.take(indices)) + indexes = self.index._query(geom) + return cast(numpy.ndarray, self.items.take(indexes)) def nearest( self, geom: BaseGeometry, ) -> numpy.ndarray: if shapely_version >= v2: - indices = self.index.nearest(geom) + indexes = self.index.nearest(geom) else: - indices = self.index._nearest(geom) - return cast(numpy.ndarray, self.items.take(indices)) + indexes = self.index._nearest(geom) + return cast(numpy.ndarray, self.items.take(indexes)) diff --git a/src/emsarray/conventions/_base.py b/src/emsarray/conventions/_base.py index d43d5eb..4820dc5 100644 --- a/src/emsarray/conventions/_base.py +++ b/src/emsarray/conventions/_base.py @@ -100,7 +100,7 @@ def __lt__(self, other: Any) -> bool: return NotImplemented # SpatialIndexItems are only for cells / polygons, so we only need to - # compare the linear indices. The polygon attribute is not orderable, + # compare the linear indexes. The polygon attribute is not orderable, # so comparing on that is going to be unpleasant. return self.linear_index < other.linear_index @@ -560,7 +560,7 @@ def ravel_index(self, index: Index) -> int: >>> dataset.ems.ravel_index((3, 4)) 124 - Cell polygons are indexed in the same order as the linear indices for cells. + Cell polygons are indexed in the same order as the linear indexes for cells. To find the polygon for the cell with the native index ``(3, 4)``: .. code-block:: python @@ -1272,7 +1272,7 @@ def polygons(self) -> numpy.ndarray: The order of the polygons in the list corresponds to the linear index of this dataset. - Not all valid cell indices have a polygon, + Not all valid cell indexes have a polygon, these holes are represented as :data:`None` in the list. If you want a list of just polygons, apply the :attr:`mask `: @@ -1357,7 +1357,7 @@ def strtree(self) -> STRtree: This allows for fast spatial lookups, querying which cells lie at a point, or which cells intersect a geometry. - Querying the STRtree will return the linear indices of any matching cells. + Querying the STRtree will return the linear indexes of any matching cells. Use :attr:`polygons` to find the geometries associated with each index. Use :meth:`wind_index()` to transform this back to a native index, or :meth:`ravel` to linearise a variable. @@ -1365,7 +1365,7 @@ def strtree(self) -> STRtree: Examples -------- - Find the indices of all cells that intersect a line: + Find the indexes of all cells that intersect a line: .. code-block:: python @@ -1841,7 +1841,7 @@ def get_grid_kind(self, data_array: xarray.DataArray) -> GridKind: @abc.abstractmethod def unpack_index(self, index: Index) -> tuple[GridKind, Sequence[int]]: - """Convert a native index in to a grid kind and dimension indices. + """Convert a native index in to a grid kind and dimension indexes. Parameters ---------- @@ -1852,8 +1852,8 @@ def unpack_index(self, index: Index) -> tuple[GridKind, Sequence[int]]: ------- grid_kind : GridKind The grid kind - indices : sequence of int - The dimension indices + indexes : sequence of int + The dimension indexes See Also -------- @@ -1863,15 +1863,15 @@ def unpack_index(self, index: Index) -> tuple[GridKind, Sequence[int]]: pass @abc.abstractmethod - def pack_index(self, grid_kind: GridKind, indices: Sequence[int]) -> Index: - """Convert a grid kind and dimension indices in to a native index. + def pack_index(self, grid_kind: GridKind, indexes: Sequence[int]) -> Index: + """Convert a grid kind and dimension indexes in to a native index. Parameters ---------- grid_kind : GridKind The grid kind - indices : sequence of int - The dimension indices + indexes : sequence of int + The dimension indexes Returns ------- @@ -1885,9 +1885,9 @@ def pack_index(self, grid_kind: GridKind, indices: Sequence[int]) -> Index: pass def ravel_index(self, index: Index) -> int: - grid_kind, indices = self.unpack_index(index) + grid_kind, indexes = self.unpack_index(index) shape = self.grid_shape[grid_kind] - return int(numpy.ravel_multi_index(indices, shape)) + return int(numpy.ravel_multi_index(indexes, shape)) def wind_index( self, @@ -1898,8 +1898,8 @@ def wind_index( if grid_kind is None: grid_kind = self.default_grid_kind shape = self.grid_shape[grid_kind] - indices = tuple(map(int, numpy.unravel_index(linear_index, shape))) - return self.pack_index(grid_kind, indices) + indexes = tuple(map(int, numpy.unravel_index(linear_index, shape))) + return self.pack_index(grid_kind, indexes) def ravel( self, @@ -1937,6 +1937,6 @@ def wind( linear_dimension=linear_dimension) def selector_for_index(self, index: Index) -> dict[Hashable, int]: - grid_kind, indices = self.unpack_index(index) + grid_kind, indexes = self.unpack_index(index) dimensions = self.grid_dimensions[grid_kind] - return dict(zip(dimensions, indices)) + return dict(zip(dimensions, indexes)) diff --git a/src/emsarray/conventions/arakawa_c.py b/src/emsarray/conventions/arakawa_c.py index 150ed6b..05b881f 100644 --- a/src/emsarray/conventions/arakawa_c.py +++ b/src/emsarray/conventions/arakawa_c.py @@ -257,8 +257,8 @@ def grid_dimensions(self) -> dict[ArakawaCGridKind, Sequence[Hashable]]: def unpack_index(self, index: ArakawaCIndex) -> tuple[ArakawaCGridKind, Sequence[int]]: return index[0], index[1:] - def pack_index(self, grid_kind: ArakawaCGridKind, indices: Sequence[int]) -> ArakawaCIndex: - return cast(ArakawaCIndex, (grid_kind, *indices)) + def pack_index(self, grid_kind: ArakawaCGridKind, indexes: Sequence[int]) -> ArakawaCIndex: + return cast(ArakawaCIndex, (grid_kind, *indexes)) @cached_property @utils.timed_func @@ -309,9 +309,9 @@ def make_clip_mask( logger.info("Finding intersecting cells for centre mask") # A cell is included if it intersects the clip polygon - intersecting_indices = self.strtree.query(clip_geometry, predicate='intersects') + intersecting_indexes = self.strtree.query(clip_geometry, predicate='intersects') face_mask = numpy.full(self.face.shape, fill_value=False) - face_mask.ravel()[intersecting_indices] = True + face_mask.ravel()[intersecting_indexes] = True # Expand the mask by one cell around the clipped region, as a buffer if buffer > 0: diff --git a/src/emsarray/conventions/grid.py b/src/emsarray/conventions/grid.py index d45fd23..74ba8c2 100644 --- a/src/emsarray/conventions/grid.py +++ b/src/emsarray/conventions/grid.py @@ -259,8 +259,8 @@ def grid_dimensions(self) -> dict[CFGridKind, Sequence[Hashable]]: def unpack_index(self, index: CFGridIndex) -> tuple[CFGridKind, Sequence[int]]: return CFGridKind.face, index - def pack_index(self, grid_kind: CFGridKind, indices: Sequence[int]) -> CFGridIndex: - return cast(CFGridIndex, indices) + def pack_index(self, grid_kind: CFGridKind, indexes: Sequence[int]) -> CFGridIndex: + return cast(CFGridIndex, indexes) def get_all_geometry_names(self) -> list[Hashable]: # Grid datasets contain latitude and longitude variables @@ -292,9 +292,9 @@ def make_clip_mask( ) -> xarray.Dataset: topology = self.topology - intersecting_indices = self.strtree.query(clip_geometry, predicate='intersects') + intersecting_indexes = self.strtree.query(clip_geometry, predicate='intersects') mask = numpy.full(topology.shape, fill_value=False) - mask.ravel()[intersecting_indices] = True + mask.ravel()[intersecting_indexes] = True if buffer > 0: mask = masking.blur_mask(mask, size=buffer) diff --git a/src/emsarray/conventions/shoc.py b/src/emsarray/conventions/shoc.py index cb20571..eee7d2d 100644 --- a/src/emsarray/conventions/shoc.py +++ b/src/emsarray/conventions/shoc.py @@ -58,7 +58,7 @@ def depth_coordinate(self) -> xarray.DataArray: @cached_property def depth_coordinates(self) -> tuple[xarray.DataArray, ...]: - names = ['z_centre', 'z_grid'] + names = ['z_centre', 'z_grid', 'z_centre_sed', 'z_grid_sed'] return tuple( self.dataset[name] for name in names if name in self.dataset.variables) @@ -125,7 +125,7 @@ def depth_coordinate(self) -> xarray.DataArray: @cached_property def depth_coordinates(self) -> tuple[xarray.DataArray, ...]: - names = ['zc'] + names = ['zc', 'zcsed'] return tuple( self.dataset[name] for name in names if name in self.dataset.variables) diff --git a/src/emsarray/conventions/ugrid.py b/src/emsarray/conventions/ugrid.py index 0d74bd4..33acf3c 100644 --- a/src/emsarray/conventions/ugrid.py +++ b/src/emsarray/conventions/ugrid.py @@ -38,71 +38,71 @@ def _split_coord(attr: str) -> tuple[str, str]: def buffer_faces( - face_indices: numpy.ndarray, + face_indexes: numpy.ndarray, topology: 'Mesh2DTopology', ) -> numpy.ndarray: """ When clipping a dataset to a region, including a buffer of extra faces - around the included faces is desired. Given an array of face indices, + around the included faces is desired. Given an array of face indexes, :func:`buffer_faces` will find all the faces required to form this buffer - and return a new array of face indices that include both the input faces + and return a new array of face indexes that include both the input faces and the buffer faces Specifically, this finds all faces that share a node with one of the included faces. """ - original_face_indices = set(face_indices.tolist()) + original_face_indexes = set(face_indexes.tolist()) face_node = topology.face_node_array # Find all nodes for the faces - included_nodes = set(numpy.unique(face_node[face_indices].compressed())) + included_nodes = set(numpy.unique(face_node[face_indexes].compressed())) # Find all faces that are composed of any of the nodes just found included_faces = ( face_index - for face_index, node_indices in enumerate(face_node) + for face_index, node_indexes in enumerate(face_node) # Either one of the original faces ... - if face_index in original_face_indices + if face_index in original_face_indexes # ... or shares a node with one of the original faces - or bool(included_nodes.intersection(node_indices.compressed())) + or bool(included_nodes.intersection(node_indexes.compressed())) ) return cast(numpy.ndarray, numpy.fromiter(included_faces, dtype=topology.sensible_dtype)) -def mask_from_face_indices( - face_indices: numpy.ndarray, +def mask_from_face_indexes( + face_indexes: numpy.ndarray, topology: 'Mesh2DTopology', ) -> xarray.Dataset: """ - Make a mask dataset from a list of face indices. + Make a mask dataset from a list of face indexes. This mask can later be applied using :meth:`~.Convention.apply_clip_mask`. A mask for a UGRID dataset indicates which nodes, edges, and faces - (collectively 'elements') to include, and their new indices. When + (collectively 'elements') to include, and their new indexes. When elements are dropped from the dataset, to keep the remaining data - contiguous, all elements gain new indices. + contiguous, all elements gain new indexes. The mask consists of three data arrays corresponding to nodes, edges, and faces. Each data array indicates the new index for that element, or whether that element should be dropped. """ - # Generate a new index for each element. Indices are assigned in the same + # Generate a new index for each element. Indexes are assigned in the same # order as they currently exist. This means that data rows for excluded # elements can be dropped, and the remaining data rows will automatically - # have the correct indices. + # have the correct indexes. fill_value = topology.sensible_fill_value data_vars = {} - def new_element_indices(size: int, indices: numpy.ndarray) -> numpy.ma.MaskedArray: - new_indices = numpy.full( + def new_element_indexes(size: int, indexes: numpy.ndarray) -> numpy.ma.MaskedArray: + new_indexes = numpy.full( (size,), fill_value=fill_value, dtype=topology.sensible_dtype) - new_indices = numpy.ma.masked_array(new_indices, mask=True) - new_indices[indices] = numpy.arange(len(indices)) - return new_indices + new_indexes = numpy.ma.masked_array(new_indexes, mask=True) + new_indexes[indexes] = numpy.arange(len(indexes)) + return new_indexes # Record which old face index maps to which new face index data_vars['new_face_index'] = _masked_integer_data_array( - data=new_element_indices(topology.face_count, face_indices), + data=new_element_indexes(topology.face_count, face_indexes), fill_value=fill_value, dims=['old_face_index'], ) @@ -111,22 +111,22 @@ def new_element_indices(size: int, indices: numpy.ndarray) -> numpy.ma.MaskedArr # dataset doesn't define an edge dimension if topology.has_edge_dimension: face_edge = topology.face_edge_array - edge_indices = numpy.sort(numpy.unique(face_edge[face_indices].compressed())) + edge_indexes = numpy.sort(numpy.unique(face_edge[face_indexes].compressed())) # Record which old edge index maps to which new edge index data_vars['new_edge_index'] = _masked_integer_data_array( - data=new_element_indices(topology.edge_count, edge_indices), + data=new_element_indexes(topology.edge_count, edge_indexes), fill_value=fill_value, dims=['old_edge_index'], ) # Find all nodes associated with included faces face_node = topology.face_node_array - node_indices = numpy.sort(numpy.unique(face_node[face_indices].compressed())) + node_indexes = numpy.sort(numpy.unique(face_node[face_indexes].compressed())) # Record which old node index maps to which new node index data_vars['new_node_index'] = _masked_integer_data_array( - data=new_element_indices(topology.node_count, node_indices), + data=new_element_indexes(topology.node_count, node_indexes), fill_value=fill_value, dims=['old_node_index'], ) @@ -174,7 +174,7 @@ def _get_start_index(connectivity: xarray.DataArray) -> int: def update_connectivity( connectivity: xarray.DataArray, old_array: numpy.ndarray, - row_indices: numpy.ndarray, + row_indexes: numpy.ndarray, column_values: numpy.ndarray, primary_dimension: Hashable, fill_value: int, @@ -200,7 +200,7 @@ def update_connectivity( The old connectivity array, the companion to the connectivity data array. Each row corresponds to either an edge or a face. - row_indices : numpy.ndarray + row_indexes : numpy.ndarray A one dimensional numpy masked array indicating which rows of ``old_array`` are to be included. Masked items are excluded, all other items are included. @@ -243,7 +243,7 @@ def update_connectivity( # By constructing the array using new_fill_value where needed, # setting the dtype explicitly, and adding the _FillValue attribute, # xarray will cooperate. - include_row = ~numpy.ma.getmask(row_indices) + include_row = ~numpy.ma.getmask(row_indexes) raw_values = numpy.array([ [ column_values[item] if item is not numpy.ma.masked else fill_value @@ -481,7 +481,7 @@ def _to_index_array( primary_dimension: Hashable, ) -> numpy.ndarray: """ - Convert a data array of node, edge, or face indices + Convert a data array of node, edge, or face indexes into a masked numpy integer array. This takes care of casting arrays to integers and fixing one-based indexing where appropriate. @@ -517,7 +517,7 @@ def _to_index_array( values = numpy.ma.masked_array(values, mask=numpy.ma.nomask) # UGRID conventions allow for zero based or one based indexing. - # To be consistent we convert all indices to zero based. + # To be consistent we convert all indexes to zero based. start_index = _get_start_index(data_array) if start_index != 0: values = values - start_index @@ -599,7 +599,7 @@ def make_edge_node_array(self) -> numpy.ndarray: # Each edge is composed of two nodes. Each edge may be named twice, # once for each face. To de-duplicate this, edges are built up using # this dict-of-sets, where the dict index is the node with the - # lower index, and the set is the node indices of the other end. + # lower index, and the set is the node indexes of the other end. low_highs: dict[int, set[int]] = defaultdict(set) for face_index, node_pairs in self._face_and_node_pair_iter(): @@ -674,8 +674,8 @@ def make_edge_face_array(self) -> numpy.ndarray: # The number of faces already seen for this edge edge_face_count = numpy.zeros(self.edge_count, dtype=self.sensible_dtype) - for face_index, edge_indices in enumerate(self.face_edge_array): - for edge_index in edge_indices.compressed(): + for face_index, edge_indexes in enumerate(self.face_edge_array): + for edge_index in edge_indexes.compressed(): edge_face[edge_index, edge_face_count[edge_index]] = face_index edge_face_count[edge_index] += 1 @@ -719,7 +719,7 @@ def face_node_array(self) -> numpy.ndarray: """ An integer numpy array with shape (:attr:`face_dimension`, :attr:`max_node_dimension`), - representing the node indices that make up each face. + representing the node indexes that make up each face. """ return self._to_index_array( self.face_node_connectivity, self.face_dimension) @@ -765,7 +765,7 @@ def face_edge_array(self) -> numpy.ndarray: """ An integer numpy array with shape (:attr:`face_dimension`, :attr:`max_node_dimension`), - representing the edge indices that border each face. + representing the edge indexes that border each face. """ if self.has_valid_face_edge_connectivity: return self._to_index_array( @@ -833,7 +833,7 @@ def face_face_array(self) -> numpy.ndarray: """ An integer numpy array with shape (:attr:`face_dimension`, :attr:`max_node_dimension`), - representing the indices of all faces that border a given face. + representing the indexes of all faces that border a given face. """ if self.has_valid_face_face_connectivity: return self._to_index_array( @@ -848,10 +848,10 @@ def make_face_face_array(self) -> numpy.ndarray: filled = numpy.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype) face_face: numpy.ndarray = numpy.ma.masked_array(filled, mask=True) - for edge_index, face_indices in enumerate(self.edge_face_array): - if numpy.any(numpy.ma.getmask(face_indices)): + for edge_index, face_indexes in enumerate(self.edge_face_array): + if numpy.any(numpy.ma.getmask(face_indexes)): continue - left, right = face_indices + left, right = face_indexes face_face[left, face_count[left]] = right face_face[right, face_count[right]] = left face_count[left] += 1 @@ -866,10 +866,10 @@ def _face_and_node_pair_iter(self) -> Iterable[tuple[int, list[tuple[int, int]]] defining the edges of the face. """ face_node = self.face_node_array - for face_index, node_indices in enumerate(face_node): - node_indices = node_indices.compressed() - node_indices = numpy.append(node_indices, node_indices[0]) - yield face_index, list(utils.pairwise(node_indices)) + for face_index, node_indexes in enumerate(face_node): + node_indexes = node_indexes.compressed() + node_indexes = numpy.append(node_indexes, node_indexes[0]) + yield face_index, list(utils.pairwise(node_indexes)) @cached_property def dimension_for_grid_kind(self) -> dict['UGridKind', Hashable]: @@ -1013,7 +1013,7 @@ class UGridKind(str, enum.Enum): node = 'node' -#: UGRID indices are always single integers, for all index kinds. +#: UGRID indexes are always single integers, for all index kinds. UGridIndex = tuple[UGridKind, int] @@ -1065,8 +1065,8 @@ def grid_dimensions(self) -> dict[UGridKind, Sequence[Hashable]]: def unpack_index(self, index: UGridIndex) -> tuple[UGridKind, Sequence[int]]: return index[0], index[1:] - def pack_index(self, grid_kind: UGridKind, indices: Sequence[int]) -> UGridIndex: - return (grid_kind, indices[0]) + def pack_index(self, grid_kind: UGridKind, indexes: Sequence[int]) -> UGridIndex: + return (grid_kind, indexes[0]) @cached_property def grid_kinds(self) -> frozenset[UGridKind]: @@ -1128,9 +1128,9 @@ def make_clip_mask( This mask can later be applied using :meth:`apply_clip_mask`. A mask for a UGRID dataset indicates which nodes, edges, and faces - (collectively 'elements') to include, and their new indices. When + (collectively 'elements') to include, and their new indexes. When elements are dropped from the dataset, to keep the remaining data - contiguous, all elements gain new indices. + contiguous, all elements gain new indexes. The mask consists of three data arrays corresponding to nodes, edges, and faces. Each data array indicates the new index for that element, or @@ -1144,16 +1144,16 @@ def make_clip_mask( """ # Find all faces that intersect the clip geometry logger.info("Making clip mask") - face_indices = self.strtree.query(clip_geometry, predicate='intersects') - logger.debug("Found %d intersecting faces, adding size %d buffer...", len(face_indices), buffer) + face_indexes = self.strtree.query(clip_geometry, predicate='intersects') + logger.debug("Found %d intersecting faces, adding size %d buffer...", len(face_indexes), buffer) # Include all the neighbours of the intersecting faces for _ in range(buffer): - face_indices = buffer_faces(face_indices, self.topology) - logger.debug("Total faces in mask: %d", len(face_indices)) + face_indexes = buffer_faces(face_indexes, self.topology) + logger.debug("Total faces in mask: %d", len(face_indexes)) # Make a mask dataset - return mask_from_face_indices(face_indices, self.topology) + return mask_from_face_indexes(face_indexes, self.topology) def apply_clip_mask(self, clip_mask: xarray.Dataset, work_dir: Pathish) -> xarray.Dataset: """ @@ -1162,7 +1162,7 @@ def apply_clip_mask(self, clip_mask: xarray.Dataset, work_dir: Pathish) -> xarra See Also -------- :meth:`make_clip_mask` - :func:`mask_from_face_indices` + :func:`mask_from_face_indexes` """ logger.info("Applying clip mask") dataset = self.dataset @@ -1183,7 +1183,7 @@ def apply_clip_mask(self, clip_mask: xarray.Dataset, work_dir: Pathish) -> xarra # This is the fill value used in the mask. new_fill_value = clip_mask.data_vars['new_node_index'].encoding['_FillValue'] - def integer_indices(data_array: xarray.DataArray) -> numpy.ndarray: + def integer_indexes(data_array: xarray.DataArray) -> numpy.ndarray: masked_values = numpy.ma.masked_invalid(data_array.values) # numpy will emit a warning when converting an array with numpy.nan to int, # even if the nans are masked out. @@ -1191,41 +1191,41 @@ def integer_indices(data_array: xarray.DataArray) -> numpy.ndarray: masked_integers: numpy.ndarray = masked_values.astype(numpy.int_) return masked_integers - new_node_indices = integer_indices(clip_mask.data_vars['new_node_index']) - new_face_indices = integer_indices(clip_mask.data_vars['new_face_index']) + new_node_indexes = integer_indexes(clip_mask.data_vars['new_node_index']) + new_face_indexes = integer_indexes(clip_mask.data_vars['new_face_index']) has_edges = 'new_edge_index' in clip_mask.data_vars if has_edges: - new_edge_indices = integer_indices(clip_mask.data_vars['new_edge_index']) + new_edge_indexes = integer_indexes(clip_mask.data_vars['new_edge_index']) # Re-index the face_node_connectivity variable topology_variables.append(update_connectivity( topology.face_node_connectivity, topology.face_node_array, - new_face_indices, new_node_indices, + new_face_indexes, new_node_indexes, primary_dimension=topology.face_dimension, fill_value=new_fill_value)) # Re-index each of the optional connectivity variables if has_edges and topology.has_valid_face_edge_connectivity: topology_variables.append(update_connectivity( topology.face_edge_connectivity, topology.face_edge_array, - new_face_indices, new_edge_indices, + new_face_indexes, new_edge_indexes, primary_dimension=topology.edge_dimension, fill_value=new_fill_value)) if topology.has_valid_face_face_connectivity: topology_variables.append(update_connectivity( topology.face_face_connectivity, topology.face_face_array, - new_face_indices, new_face_indices, + new_face_indexes, new_face_indexes, primary_dimension=topology.face_dimension, fill_value=new_fill_value)) if has_edges and topology.has_valid_edge_face_connectivity: topology_variables.append(update_connectivity( topology.edge_face_connectivity, topology.edge_face_array, - new_edge_indices, new_face_indices, + new_edge_indexes, new_face_indexes, primary_dimension=topology.face_dimension, fill_value=new_fill_value)) if has_edges and topology.has_valid_edge_node_connectivity: topology_variables.append(update_connectivity( topology.edge_node_connectivity, topology.edge_node_array, - new_edge_indices, new_node_indices, + new_edge_indexes, new_node_indexes, primary_dimension=topology.edge_dimension, fill_value=new_fill_value)) # Save all the topology variables to one combined dataset @@ -1243,11 +1243,11 @@ def integer_indices(data_array: xarray.DataArray) -> numpy.ndarray: logger.debug("Slicing data variables...") dimension_masks: dict[Hashable, numpy.ndarray] = { - topology.node_dimension: ~numpy.ma.getmask(new_node_indices), - topology.face_dimension: ~numpy.ma.getmask(new_face_indices), + topology.node_dimension: ~numpy.ma.getmask(new_node_indexes), + topology.face_dimension: ~numpy.ma.getmask(new_face_indexes), } if has_edges: - dimension_masks[topology.edge_dimension] = ~numpy.ma.getmask(new_edge_indices) + dimension_masks[topology.edge_dimension] = ~numpy.ma.getmask(new_edge_indexes) mesh_dimensions = set(dimension_masks.keys()) for name, data_array in dataset.data_vars.items(): diff --git a/src/emsarray/masking.py b/src/emsarray/masking.py index 9b0f196..d9db967 100644 --- a/src/emsarray/masking.py +++ b/src/emsarray/masking.py @@ -307,7 +307,7 @@ def smear_mask(arr: numpy.ndarray, pad_axes: list[bool]) -> numpy.ndarray: def blur_mask(arr: numpy.ndarray, size: int = 1) -> numpy.ndarray: """ - Take a boolean numpy array and blur it, such that all indices neighbouring + Take a boolean numpy array and blur it, such that all indexes neighbouring a True value in the input array are True in the output array. The output array will have the same shape as the input array. @@ -352,10 +352,10 @@ def blur_mask(arr: numpy.ndarray, size: int = 1) -> numpy.ndarray: # the blurred mask is true if the original mask was true, # or any cells in a `size` sized slice around the original cell. arr_iter = numpy.nditer(arr, ['multi_index']) - indices = (arr_iter.multi_index for _ in arr_iter) + indexes = (arr_iter.multi_index for _ in arr_iter) values = ( arr[index] or numpy.any(padded[tuple(slice(i, i + size * 2 + 1) for i in index)]) - for index in indices + for index in indexes ) arr = numpy.fromiter(values, count=arr.size, dtype=arr.dtype).reshape(arr.shape) diff --git a/src/emsarray/operations/depth.py b/src/emsarray/operations/depth.py index 4473cf3..38d6c8e 100644 --- a/src/emsarray/operations/depth.py +++ b/src/emsarray/operations/depth.py @@ -144,8 +144,8 @@ def ocean_floor( data_array = dataset.data_vars[variable_names[0]].isel( {name: 0 for name in non_spatial_dimensions}, drop=True, missing_dims='ignore') - # Then find the ocean floor indices. - ocean_floor_indices = _find_ocean_floor_indices( + # Then find the ocean floor indexes. + ocean_floor_indexes = _find_ocean_floor_indexes( data_array, depth_dimension) # Extract just the variables with these spatial coordinates @@ -159,9 +159,9 @@ def ocean_floor( if coordinate.dims == (depth_dimension,) ]) - # Find the ocean floor using the ocean_floor_indices + # Find the ocean floor using the ocean_floor_indexes dataset_subset = dataset_subset.isel( - {depth_dimension: ocean_floor_indices}, + {depth_dimension: ocean_floor_indexes}, drop=True, missing_dims='ignore') # Merge these floored variables back in to the dataset @@ -176,7 +176,7 @@ def ocean_floor( return dataset -def _find_ocean_floor_indices( +def _find_ocean_floor_indexes( data_array: xarray.DataArray, depth_dimension: Hashable, ) -> xarray.DataArray: @@ -191,9 +191,9 @@ def _find_ocean_floor_indices( # # Columns of all nans will have an argmax index of 0. # Item 0 in the column will be nan, resulting in nan in the output as desired. - depth_indices = (data_array * 0 + 1).cumsum(str(depth_dimension)) - max_depth_indices = depth_indices.argmax(str(depth_dimension)) - return cast(xarray.DataArray, max_depth_indices) + depth_indexes = (data_array * 0 + 1).cumsum(str(depth_dimension)) + max_depth_indexes = depth_indexes.argmax(str(depth_dimension)) + return cast(xarray.DataArray, max_depth_indexes) def normalize_depth_variables( @@ -225,8 +225,8 @@ def normalize_depth_variables( If False, negative values indicate depth below the surface. If None, this attribute of the depth coordinate is left unmodified. deep_to_shallow : bool, optional - If True, the layers are ordered such that deeper layers have lower indices. - If False, the layers are ordered such that deeper layers have higher indices. + If True, the layers are ordered such that deeper layers have lower indexes. + If False, the layers are ordered such that deeper layers have higher indexes. If None, this attribute of the depth coordinate is left unmodified. Returns diff --git a/src/emsarray/operations/point_extraction.py b/src/emsarray/operations/point_extraction.py index 58e24ba..f183f20 100644 --- a/src/emsarray/operations/point_extraction.py +++ b/src/emsarray/operations/point_extraction.py @@ -23,6 +23,7 @@ import xarray import xarray.core.dtypes as xrdtypes +from emsarray import utils from emsarray.conventions import Convention @@ -32,8 +33,8 @@ class NonIntersectingPoints(ValueError): Raised when a point to extract does not intersect the dataset geometry. """ - #: The indices of the points that do not intersect - indices: numpy.ndarray + #: The indexes of the points that do not intersect + indexes: numpy.ndarray #: The non-intersecting points points: list[shapely.Point] @@ -41,6 +42,14 @@ class NonIntersectingPoints(ValueError): def __post_init__(self) -> None: super().__init__(f"{self.points[0].wkt} does not intersect the dataset geometry") + @property + @utils.deprecated( + "NonIntersectingPoints.indices has been renamed to NonIntersectingPoints.indexes", + DeprecationWarning, + ) + def indices(self) -> numpy.ndarray: + return self.indexes + def _dataframe_to_dataset( dataframe: pandas.DataFrame, @@ -90,7 +99,7 @@ def extract_points( A subset of the input dataset that only contains data at the given points. The dataset will only contain the values, without any geometry coordinates. The `point_dimension` dimension will have a coordinate with the same name - whose values match the indices of the `points` array. + whose values match the indexes of the `points` array. This is useful when `errors` is 'drop' to find out which points were dropped. See Also @@ -106,10 +115,11 @@ def extract_points( out_of_bounds = numpy.flatnonzero(numpy.equal(indexes, None)) # type: ignore if len(out_of_bounds): raise NonIntersectingPoints( - indices=out_of_bounds, + indexes=out_of_bounds, points=[points[i] for i in out_of_bounds]) # Make a DataFrame out of all point indexers + selector = convention.selector_for_indexes([i.index for i in indexes]) selector_df = pandas.DataFrame([ convention.selector_for_index(index.index) for index in indexes diff --git a/src/emsarray/operations/triangulate.py b/src/emsarray/operations/triangulate.py index d13c0d3..52ac842 100644 --- a/src/emsarray/operations/triangulate.py +++ b/src/emsarray/operations/triangulate.py @@ -27,16 +27,16 @@ def triangulate_dataset( Returns ------- - tuple of vertices, triangles, and cell indices. + tuple of vertices, triangles, and cell indexes. A tuple of three lists is returned, - containing vertices, triangles, and cell indices respectively. + containing vertices, triangles, and cell indexes respectively. Each vertex is a tuple of (x, y) or (lon, lat) coordinates. Each triangle is a tuple of three integers, indicating which vertices make up the triangle. - The cell indices tie the triangles to the original cell polygon, + The cell indexes tie the triangles to the original cell polygon, allowing you to plot data on the triangle mesh. Examples @@ -54,7 +54,7 @@ def triangulate_dataset( # Triangulate the dataset dataset = emsarray.tuorial.open_dataset("austen") - vertices, triangles, cell_indices = triangulate_dataset(dataset) + vertices, triangles, cell_indexes = triangulate_dataset(dataset) # This takes a while to render mesh = hv.TriMesh((triangles, vertices)) @@ -70,7 +70,7 @@ def triangulate_dataset( from emsarray.operations import triangulate_dataset dataset = emsarray.tutorial.open_dataset("gbr4") - vertices, triangles, cell_indices = triangulate_dataset(dataset) + vertices, triangles, cell_indexes = triangulate_dataset(dataset) # Trimesh expects 3D vertices. vertices = numpy.c_[vertices, numpy.zeros(len(vertices))] mesh = trimesh.Trimesh(vertices=vertices, faces=triangles) @@ -78,7 +78,7 @@ def triangulate_dataset( depth = 1 - (dataset.data_vars["Mesh2_depth"].values / -200) depth_colour = numpy.c_[depth, depth, depth, numpy.ones_like(depth)] * 255 - mesh.visual.face_colors = depth_colour[cell_indices] + mesh.visual.face_colors = depth_colour[cell_indexes] mesh.show() .. _holoviews: https://holoviews.org/reference/elements/bokeh/TriMesh.html @@ -100,7 +100,7 @@ def triangulate_dataset( # Vertex positions are (probably) floats. For grid datasets, where cells # are implicitly defined by their centres, be careful to compute cell # vertices in consistent ways. Float equality is tricky! - vertex_indices = {vertex: index for index, vertex in enumerate(vertices)} + vertex_indexes = {vertex: index for index, vertex in enumerate(vertices)} # Each cell polygon needs to be triangulated, # while also recording the convention native index of the cell, @@ -110,14 +110,14 @@ def triangulate_dataset( for index, polygon in enumerate(polygons) if polygon is not None] triangles_with_index = list( - (tuple(vertex_indices[vertex] for vertex in triangle_coords), dataset_index) + (tuple(vertex_indexes[vertex] for vertex in triangle_coords), dataset_index) for polygon, dataset_index in polygons_with_index for triangle_coords in _triangulate_polygon(polygon) ) triangles: list[Triangle] = [tri for tri, index in triangles_with_index] # type: ignore - indices = [index for tri, index in triangles_with_index] + indexes = [index for tri, index in triangles_with_index] - return (vertices, triangles, indices) + return (vertices, triangles, indexes) def _triangulate_polygon(polygon: Polygon) -> list[tuple[Vertex, Vertex, Vertex]]: diff --git a/src/emsarray/transect.py b/src/emsarray/transect.py index 9bba29b..58e39d2 100644 --- a/src/emsarray/transect.py +++ b/src/emsarray/transect.py @@ -169,7 +169,7 @@ def transect_dataset(self) -> xarray.Dataset: f'Depth variable {depth.name!r} must have a `positive` attribute' ) from err - linear_indices = [segment.linear_index for segment in self.segments] + linear_indexes = [segment.linear_index for segment in self.segments] depth = xarray.DataArray( data=depth.values, dims=(depth_dimension,), @@ -206,7 +206,7 @@ def transect_dataset(self) -> xarray.Dataset: }, ) linear_index = xarray.DataArray( - data=linear_indices, + data=linear_indexes, dims=('index',) ) @@ -289,9 +289,9 @@ def segments(self) -> list[TransectSegment[Index]]: segments = [] # Find all the cell polygons that intersect the line - intersecting_indices = self.convention.strtree.query(self.line, predicate='intersects') + intersecting_indexes = self.convention.strtree.query(self.line, predicate='intersects') - for linear_index in intersecting_indices: + for linear_index in intersecting_indexes: polygon = self.convention.polygons[linear_index] index = self.convention.wind_index(linear_index) for intersection in self._intersect_polygon(polygon): @@ -470,14 +470,14 @@ def make_ocean_floor_poly_collection( deepest = deepest_fn(bathymetry_values.values) distance_bounds = transect_dataset['distance_bounds'].values - linear_indices = transect_dataset['linear_index'].values + linear_indexes = transect_dataset['linear_index'].values vertices = [ [ - (distance_bounds[index, 0], bathymetry_values[linear_indices[index]]), + (distance_bounds[index, 0], bathymetry_values[linear_indexes[index]]), (distance_bounds[index, 0], deepest), (distance_bounds[index, 1], deepest), - (distance_bounds[index, 1], bathymetry_values[linear_indices[index]]), + (distance_bounds[index, 1], bathymetry_values[linear_indexes[index]]), ] for index in range(transect_dataset.sizes['index']) ] @@ -508,8 +508,8 @@ def prepare_data_array_for_transect(self, data_array: xarray.DataArray) -> xarra index_dimension = data_array.dims[-1] data_array = move_dimensions_to_end(data_array, [depth_dimension, index_dimension]) - linear_indices = self.transect_dataset['linear_index'].values - data_array = data_array.isel({index_dimension: linear_indices}) + linear_indexes = self.transect_dataset['linear_index'].values + data_array = data_array.isel({index_dimension: linear_indexes}) # Restore attrs after reformatting data_array.attrs.update(attrs) @@ -528,7 +528,7 @@ def _find_depth_bounds(self, data_array: xarray.DataArray) -> tuple[int, int]: but the dataset includes very deep layers the shallow regions become very small on the final plot. - This function finds the indices of the deepest and shallowest layers + This function finds the indexes of the deepest and shallowest layers where the values are not entirely nan along the transect path. The transect plot can use these to only plot depth values that have data, diff --git a/src/emsarray/utils.py b/src/emsarray/utils.py index 12c05a1..e44b8e9 100644 --- a/src/emsarray/utils.py +++ b/src/emsarray/utils.py @@ -730,10 +730,10 @@ def make_polygons_with_holes( if out is None: out = numpy.full(points.shape[0], None, dtype=numpy.object_) - complete_row_indices = numpy.flatnonzero(numpy.isfinite(points).all(axis=(1, 2))) + complete_row_indexes = numpy.flatnonzero(numpy.isfinite(points).all(axis=(1, 2))) shapely.polygons( - points[complete_row_indices], - indices=complete_row_indices, + points[complete_row_indexes], + indices=complete_row_indexes, out=out) return out diff --git a/tests/conventions/test_base.py b/tests/conventions/test_base.py index 489cb4a..a259982 100644 --- a/tests/conventions/test_base.py +++ b/tests/conventions/test_base.py @@ -68,8 +68,8 @@ def wind_index( y, x = map(int, numpy.unravel_index(index, self.shape)) return SimpleGridIndex(y, x) - def ravel_index(self, indices: SimpleGridIndex) -> int: - return int(numpy.ravel_multi_index((indices.y, indices.x), self.shape)) + def ravel_index(self, indexes: SimpleGridIndex) -> int: + return int(numpy.ravel_multi_index((indexes.y, indexes.x), self.shape)) def selector_for_index(self, index: SimpleGridIndex) -> dict[Hashable, int]: return {'x': index.x, 'y': index.y} diff --git a/tests/conventions/test_ugrid.py b/tests/conventions/test_ugrid.py index 7313841..10a4ea3 100644 --- a/tests/conventions/test_ugrid.py +++ b/tests/conventions/test_ugrid.py @@ -14,7 +14,7 @@ from emsarray.conventions import get_dataset_convention from emsarray.conventions.ugrid import ( Mesh2DTopology, NoEdgeDimensionException, UGrid, UGridKind, - _get_start_index, buffer_faces, mask_from_face_indices + _get_start_index, buffer_faces, mask_from_face_indexes ) from emsarray.exceptions import ( ConventionViolationError, ConventionViolationWarning @@ -590,7 +590,7 @@ def test_plot_on_figure(): @pytest.mark.parametrize( - ["face_indices", "expected"], + ["face_indexes", "expected"], # It really helps to draw a picture and number the faces to understand # what is going on here. # There are 25 triangles in a stack, and 50 squares in a grid. @@ -619,43 +619,43 @@ def test_plot_on_figure(): (numpy.arange(75), numpy.arange(75)) ], ) -def test_buffer_faces(face_indices, expected): +def test_buffer_faces(face_indexes, expected): dataset = make_dataset(width=5) topology = Mesh2DTopology(dataset) - assert_equal(buffer_faces(numpy.array(face_indices, dtype=int), topology), expected) + assert_equal(buffer_faces(numpy.array(face_indexes, dtype=int), topology), expected) -def test_mask_from_face_indices_without_edges(): +def test_mask_from_face_indexes_without_edges(): dataset = make_dataset(width=5, make_edges=False) topology = Mesh2DTopology(dataset) - face_indices = [20, 21, 22, 23, 24, 27, 28, 29] - node_indices = [12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 26] + face_indexes = [20, 21, 22, 23, 24, 27, 28, 29] + node_indexes = [12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 26] - mask = mask_from_face_indices(numpy.array(face_indices), topology) + mask = mask_from_face_indexes(numpy.array(face_indexes), topology) assert mask.sizes == { 'old_node_index': topology.node_count, 'old_face_index': topology.face_count, } expected_face = numpy.full(topology.face_count, fill_value=numpy.nan) - expected_face[face_indices] = numpy.arange(len(face_indices)) + expected_face[face_indexes] = numpy.arange(len(face_indexes)) assert_equal(expected_face, mask.data_vars['new_face_index'].values) expected_node = numpy.full(topology.node_count, fill_value=numpy.nan) - expected_node[node_indices] = numpy.arange(len(node_indices)) + expected_node[node_indexes] = numpy.arange(len(node_indexes)) assert_equal(expected_node, mask.data_vars['new_node_index'].values) -def test_mask_from_face_indices_with_edges(): +def test_mask_from_face_indexes_with_edges(): dataset = make_dataset(width=5, make_edges=True) topology = Mesh2DTopology(dataset) - face_indices = [20, 21, 22, 23, 24, 27, 28, 29] - edge_indices = [25, 28, 36, 37, 38, 39, 40, 41, 42, 43, 44, 49, 50, 51, 52, 53, 54, 55] - node_indices = [12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 26] + face_indexes = [20, 21, 22, 23, 24, 27, 28, 29] + edge_indexes = [25, 28, 36, 37, 38, 39, 40, 41, 42, 43, 44, 49, 50, 51, 52, 53, 54, 55] + node_indexes = [12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 26] - mask = mask_from_face_indices(numpy.array(face_indices), topology) + mask = mask_from_face_indexes(numpy.array(face_indexes), topology) assert mask.sizes == { 'old_node_index': topology.node_count, 'old_edge_index': topology.edge_count, @@ -663,15 +663,15 @@ def test_mask_from_face_indices_with_edges(): } expected_face = numpy.full(topology.face_count, fill_value=numpy.nan) - expected_face[face_indices] = numpy.arange(len(face_indices)) + expected_face[face_indexes] = numpy.arange(len(face_indexes)) assert_equal(expected_face, mask.data_vars['new_face_index'].values) expected_edge = numpy.full(topology.edge_count, fill_value=numpy.nan) - expected_edge[edge_indices] = numpy.arange(len(edge_indices)) + expected_edge[edge_indexes] = numpy.arange(len(edge_indexes)) assert_equal(expected_edge, mask.data_vars['new_edge_index'].values) expected_node = numpy.full(topology.node_count, fill_value=numpy.nan) - expected_node[node_indices] = numpy.arange(len(node_indices)) + expected_node[node_indexes] = numpy.arange(len(node_indexes)) assert_equal(expected_node, mask.data_vars['new_node_index'].values) @@ -680,12 +680,12 @@ def test_apply_clip_mask(tmp_path): topology = Mesh2DTopology(dataset) # Sketch this out and number the faces, edges, and nodes, if you want to verify - face_indices = [20, 21, 22, 23, 24, 27, 28, 29] - edge_indices = [25, 28, 36, 37, 38, 39, 40, 41, 42, 43, 44, 49, 50, 51, 52, 53, 54, 55] - node_indices = [12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 26] + face_indexes = [20, 21, 22, 23, 24, 27, 28, 29] + edge_indexes = [25, 28, 36, 37, 38, 39, 40, 41, 42, 43, 44, 49, 50, 51, 52, 53, 54, 55] + node_indexes = [12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 26] # Clip it! - mask = mask_from_face_indices(numpy.array(face_indices), topology) + mask = mask_from_face_indexes(numpy.array(face_indexes), topology) clipped = dataset.ems.apply_clip_mask(mask, tmp_path) assert isinstance(clipped.ems, UGrid) @@ -695,19 +695,19 @@ def test_apply_clip_mask(tmp_path): assert set(dataset.dims) == set(clipped.dims) # Check that the new topology seems reasonable - assert clipped.ems.topology.face_count == len(face_indices) - assert clipped.ems.topology.edge_count == len(edge_indices) - assert clipped.ems.topology.node_count == len(node_indices) + assert clipped.ems.topology.face_count == len(face_indexes) + assert clipped.ems.topology.edge_count == len(edge_indexes) + assert clipped.ems.topology.node_count == len(node_indexes) # Check that the data were preserved, beyond being clipped - assert_equal(clipped.data_vars['Mesh2_depth'].values, dataset.data_vars['Mesh2_depth'].values[face_indices]) - assert_equal(clipped.data_vars['eta'].values, dataset.data_vars['eta'].values[:, face_indices]) - assert_equal(clipped.data_vars['temp'].values, dataset.data_vars['temp'].values[:, :, face_indices]) - assert_equal(clipped.data_vars['u1'].values, dataset.data_vars['u1'].values[:, :, edge_indices]) + assert_equal(clipped.data_vars['Mesh2_depth'].values, dataset.data_vars['Mesh2_depth'].values[face_indexes]) + assert_equal(clipped.data_vars['eta'].values, dataset.data_vars['eta'].values[:, face_indexes]) + assert_equal(clipped.data_vars['temp'].values, dataset.data_vars['temp'].values[:, :, face_indexes]) + assert_equal(clipped.data_vars['u1'].values, dataset.data_vars['u1'].values[:, :, edge_indexes]) # Check that the new geometry matches the relevant polygons in the old geometry - assert len(clipped.ems.polygons) == len(face_indices) - original_polys = [dataset.ems.polygons[index] for index in face_indices] + assert len(clipped.ems.polygons) == len(face_indexes) + original_polys = [dataset.ems.polygons[index] for index in face_indexes] for original_poly, clipped_poly in zip(original_polys, clipped.ems.polygons): assert original_poly.equals_exact(clipped_poly, 1e-6) @@ -750,12 +750,12 @@ def test_make_and_apply_clip_mask(tmp_path): # Intersecting the clip polygon and the dataset geometry should have # selected some faces. Open the geojson files that are generated in qgis # and inspect the index attributes. This list is built from that. - face_indices = [6, 7, 8, 11, 12, 13, 14, 15, 19, 20, 21, 22, 23, 24, 27, 28, 29, 32, 33, 34] + face_indexes = [6, 7, 8, 11, 12, 13, 14, 15, 19, 20, 21, 22, 23, 24, 27, 28, 29, 32, 33, 34] fill_value = topology.sensible_fill_value - new_face_indices = numpy.ma.masked_array( + new_face_indexes = numpy.ma.masked_array( numpy.full(topology.face_count, fill_value, dtype=numpy.float64), mask=True) - new_face_indices[face_indices] = numpy.arange(len(face_indices)) - assert_equal(clip_mask.data_vars['new_face_index'].values, new_face_indices) + new_face_indexes[face_indexes] = numpy.arange(len(face_indexes)) + assert_equal(clip_mask.data_vars['new_face_index'].values, new_face_indexes) # Apply the clip mask work_dir = tmp_path / 'work_dir' @@ -813,20 +813,20 @@ def test_derive_connectivity(): (6, 9): (6, fv), (9, 10): (6, fv), (7, 10): (6, 7), (10, 11): (7, fv), (8, 11): (7, fv), } actual_edge_face = { - tuple(sorted(edge_node[edge_index])): tuple(sorted(face_indices)) - for edge_index, face_indices in enumerate(edge_face) + tuple(sorted(edge_node[edge_index])): tuple(sorted(face_indexes)) + for edge_index, face_indexes in enumerate(edge_face) } assert expected_edge_face == actual_edge_face # face_edge is a little tedious. It helps if we build up a mapping of # node_pair: edge_index face_edge = topology.face_edge_array - edge_indices = { - tuple(sorted(node_indices)): edge_index - for edge_index, node_indices in enumerate(edge_node.tolist()) + edge_indexes = { + tuple(sorted(node_indexes)): edge_index + for edge_index, node_indexes in enumerate(edge_node.tolist()) } expected_face_edge = [ - sorted(edge_indices[pair] for pair in row) + sorted(edge_indexes[pair] for pair in row) for row in [ [(0, 1), (1, 2), (0, 2)], [(1, 3), (3, 4), (1, 4)], @@ -855,8 +855,8 @@ def test_derive_connectivity(): [5, 6], ] actual_face_face = [ - sorted(face_indices.compressed()) - for face_indices in face_face + sorted(face_indexes.compressed()) + for face_indexes in face_face ] assert expected_face_face == actual_face_face diff --git a/tests/datasets/make_examples.py b/tests/datasets/make_examples.py index e415888..e5208f0 100644 --- a/tests/datasets/make_examples.py +++ b/tests/datasets/make_examples.py @@ -191,9 +191,9 @@ def make_ugrid_mesh2d(out: pathlib.Path) -> None: for p in polygon.exterior.coords })) # A map between {point: index} - node_indices = {tuple(p): i for i, p in enumerate(nodes)} + node_indexes = {tuple(p): i for i, p in enumerate(nodes)} # Number of vertices - nnodes = len(node_indices) + nnodes = len(node_indexes) # Maximum vertex count for any face max_vertex_count = max(len(polygon.exterior.coords) for polygon in faces) - 1 # A sensible fill value of all nines @@ -221,7 +221,7 @@ def make_ugrid_mesh2d(out: pathlib.Path) -> None: fill_value=fill_value) for iface, polygon in enumerate(faces): coords = polygon.exterior.coords - face_node = [node_indices[p] for p in coords[:-1]] + face_node = [node_indexes[p] for p in coords[:-1]] face_node_connectivity[iface, :len(face_node)] = face_node values = dataset.createVariable("values", "i4", ["face"]) diff --git a/tests/masking/test_mask_dataset.py b/tests/masking/test_mask_dataset.py index 5fa33c8..62b9eac 100644 --- a/tests/masking/test_mask_dataset.py +++ b/tests/masking/test_mask_dataset.py @@ -83,7 +83,7 @@ def test_mask_dataset(tmp_path: pathlib.Path): # * Different coordinate sets are masked using the appropriate mask # The input dataset is a 6x5 grid, and the mask defines a 4x3 area in the - # centre with the (2,2), (3,1), and (3,2) indices not included. The input + # centre with the (2,2), (3,1), and (3,2) indexes not included. The input # dataset is nominally a SHOC standard dataset. centres = mask_from_strings(["00000", "01110", "01110", "01100", "01000", "00000"]) mask = c_mask_from_centres(centres, { diff --git a/tests/operations/point_extraction/test_extract_dataframe.py b/tests/operations/point_extraction/test_extract_dataframe.py index 6f45dd8..a072fcc 100644 --- a/tests/operations/point_extraction/test_extract_dataframe.py +++ b/tests/operations/point_extraction/test_extract_dataframe.py @@ -87,7 +87,7 @@ def test_extract_points_missing_point_error( with pytest.raises(point_extraction.NonIntersectingPoints) as exc_info: point_extraction.extract_dataframe(in_dataset, points_df, ('lon', 'lat')) exc: point_extraction.NonIntersectingPoints = exc_info.value - assert_equal(exc.indices, [1, 3]) + assert_equal(exc.indexes, [1, 3]) def test_extract_points_missing_point_drop( diff --git a/tests/operations/point_extraction/test_extract_points.py b/tests/operations/point_extraction/test_extract_points.py index cc3de16..2c0973e 100644 --- a/tests/operations/point_extraction/test_extract_points.py +++ b/tests/operations/point_extraction/test_extract_points.py @@ -57,7 +57,7 @@ def test_extract_points_missing_point_error( with pytest.raises(point_extraction.NonIntersectingPoints) as exc_info: point_extraction.extract_points(in_dataset, points) exc: point_extraction.NonIntersectingPoints = exc_info.value - assert_equal(exc.indices, [4, 5, 6, 7]) + assert_equal(exc.indexes, [4, 5, 6, 7]) def test_extract_points_missing_point_drop( diff --git a/tests/operations/triangulate/test_triangulate_dataset.py b/tests/operations/triangulate/test_triangulate_dataset.py index 16fb5c7..50cbdf9 100644 --- a/tests/operations/triangulate/test_triangulate_dataset.py +++ b/tests/operations/triangulate/test_triangulate_dataset.py @@ -17,7 +17,7 @@ def test_triangulate_dataset_cfgrid1d(datasets): dataset = emsarray.open_dataset(datasets / 'cfgrid1d.nc') topology = dataset.ems.topology dataset.ems.polygons - vertices, triangles, cell_indices = triangulate_dataset(dataset) + vertices, triangles, cell_indexes = triangulate_dataset(dataset) # A vertex at the intersection of every cell assert len(vertices) == (topology.latitude.size + 1) * (topology.longitude.size + 1) @@ -25,15 +25,15 @@ def test_triangulate_dataset_cfgrid1d(datasets): # Two triangles per cell. assert len(triangles) == 2 * topology.latitude.size * topology.longitude.size - assert min(cell_indices) == 0 - assert max(cell_indices) == topology.latitude.size * topology.longitude.size - 1 + assert min(cell_indexes) == 0 + assert max(cell_indexes) == topology.latitude.size * topology.longitude.size - 1 - check_triangulation(dataset, vertices, triangles, cell_indices) + check_triangulation(dataset, vertices, triangles, cell_indexes) def test_triangulate_dataset_cfgrid2d(datasets): dataset = emsarray.open_dataset(datasets / "cfgrid2d.nc") - vertices, triangles, cell_indices = triangulate_dataset(dataset) + vertices, triangles, cell_indexes = triangulate_dataset(dataset) topology = dataset.ems.topology # There is a hole in one corner, taking out 6 vertices from the expected count @@ -46,12 +46,12 @@ def test_triangulate_dataset_cfgrid2d(datasets): only_polygons = dataset.ems.polygons[dataset.ems.mask] assert len(triangles) == 2 * len(only_polygons) - check_triangulation(dataset, vertices, triangles, cell_indices) + check_triangulation(dataset, vertices, triangles, cell_indexes) def test_triangulate_dataset_shoc_standard(datasets): dataset = emsarray.open_dataset(datasets / 'shoc_standard.nc') - vertices, triangles, cell_indices = triangulate_dataset(dataset) + vertices, triangles, cell_indexes = triangulate_dataset(dataset) # There is no good way of calculating the number of vertices, as the # geometry is quite complicated in shoc datasets with wet cells @@ -60,13 +60,13 @@ def test_triangulate_dataset_shoc_standard(datasets): only_polygons = dataset.ems.polygons[dataset.ems.mask] assert len(triangles) == 2 * len(only_polygons) - check_triangulation(dataset, vertices, triangles, cell_indices) + check_triangulation(dataset, vertices, triangles, cell_indexes) def test_triangulate_dataset_ugrid(datasets): dataset = emsarray.open_dataset(datasets / "ugrid_mesh2d.nc") topology = dataset.ems.topology - vertices, triangles, cell_indices = triangulate_dataset(dataset) + vertices, triangles, cell_indexes = triangulate_dataset(dataset) # The vertices should be identical to the ugrid nodes assert len(vertices) == topology.node_count @@ -80,28 +80,28 @@ def test_triangulate_dataset_ugrid(datasets): ) assert len(triangles) == expected_triangle_count - check_triangulation(dataset, vertices, triangles, cell_indices) + check_triangulation(dataset, vertices, triangles, cell_indexes) def check_triangulation( dataset: xarray.Dataset, vertices: list[tuple[float, float]], triangles: list[tuple[int, int, int]], - cell_indices: list[int], + cell_indexes: list[int], ): """ Check the triangulation of a dataset by reconstructing all polygons. These checks are independent of the specific convention. """ - # Check that the cell indices are within bounds. - assert len(cell_indices) == len(triangles) - assert min(cell_indices) >= 0 - assert max(cell_indices) <= len(dataset.ems.polygons) + # Check that the cell indexes are within bounds. + assert len(cell_indexes) == len(triangles) + assert min(cell_indexes) >= 0 + assert max(cell_indexes) <= len(dataset.ems.polygons) # For each cell in the dataset, reconstruct its polygon from the triangles # and check that it matches cell_triangles = defaultdict(list) - for triangle, cell_index in zip(triangles, cell_indices): + for triangle, cell_index in zip(triangles, cell_indexes): cell_triangles[cell_index].append(triangle) for index, polygon in enumerate(dataset.ems.polygons): From 43fd43d97ca97c245a71249f9b7d646c0e9fb7a9 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Wed, 10 Jul 2024 13:34:31 +1000 Subject: [PATCH 2/5] Add function `emsarray.utils.find_unused_dimension()` --- src/emsarray/utils.py | 36 +++++++++++++++++++++---- tests/test_utils.py | 63 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 81 insertions(+), 18 deletions(-) diff --git a/src/emsarray/utils.py b/src/emsarray/utils.py index e44b8e9..6e54c02 100644 --- a/src/emsarray/utils.py +++ b/src/emsarray/utils.py @@ -519,6 +519,36 @@ def move_dimensions_to_end( return data_array.transpose(*new_order) +def find_unused_dimension( + dataset_or_data_array: xarray.Dataset | xarray.DataArray, + prefix: str = 'index', +) -> str: + """ + Find an unused dimension name in a :class:`xarray.Dataset` or :class:`xarray.DataArray`. + Useful when transforming datasets in a way that creates a new dimension. + + Parameters + ---------- + dataset_or_data_array : xarray.Dataset or xarray.DataArray + A dataset or data array + prefix : str, optional + The name of the new dimension. If this dimension already exists, + `prefix_0` is checked, then `prefix_1`, `prefix_2`, etc. + + Returns + ------- + str + A dimension name that does not exist in the dataset or data array passed in. + """ + existing_dims = set(dataset_or_data_array.dims) + if prefix not in existing_dims: + return prefix + candidates = (f'{prefix}_{suffix}' for suffix in itertools.count(start=0)) + return next( + candidate for candidate in candidates + if candidate not in existing_dims) + + def ravel_dimensions( data_array: xarray.DataArray, dimensions: list[Hashable], @@ -573,11 +603,7 @@ def ravel_dimensions( existing_dims = data_array.dims[:-len(dimensions)] if linear_dimension is None: - suffix = 0 - linear_dimension = 'index' - while linear_dimension in existing_dims: - linear_dimension = f'index_{suffix}' - suffix += 1 + linear_dimension = find_unused_dimension(data_array, 'index') new_dims = existing_dims + (linear_dimension,) coords = { diff --git a/tests/test_utils.py b/tests/test_utils.py index aa27b2d..f6a6d3d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -433,7 +433,41 @@ def test_move_dimensions_to_end_missing(): assert str(exc.value) == message -def test_linearize_dimensions_exact_dimensions(): +def test_find_unused_dimension(): + data_array = xarray.DataArray( + data=numpy.random.random((3, 5)), + dims=['y', 'x'], + ) + assert utils.find_unused_dimension(data_array) == 'index' + + +def test_find_unused_dimension_prefix(): + data_array = xarray.DataArray( + data=numpy.random.random((3, 5)), + dims=['y', 'x'], + ) + assert utils.find_unused_dimension(data_array, 'foo') == 'foo' + + +@pytest.mark.parametrize( + ['dims', 'prefix', 'expected'], + [ + (['index'], 'index', 'index_0'), + (['index', 'index_0'], 'index', 'index_1'), + (['index', 'index_0', 'index_1'], 'index', 'index_2'), + (['index', 'index_1'], 'index', 'index_0'), + (['index'], 'dim', 'dim'), + ], +) +def test_find_unused_dimension_conflict(dims: list[str], prefix: str, expected: str): + data_array = xarray.DataArray( + data=numpy.random.random([2] * len(dims)), + dims=dims, + ) + assert utils.find_unused_dimension(data_array, prefix) == expected + + +def test_ravel_dimensions_exact_dimensions(): data_array = xarray.DataArray( data=numpy.random.random((3, 5)), dims=['y', 'x'], @@ -442,11 +476,11 @@ def test_linearize_dimensions_exact_dimensions(): data=data_array.values.ravel(), dims=['index'], ) - linearized = utils.ravel_dimensions(data_array, ['y', 'x']) - xarray.testing.assert_equal(linearized, expected) + ravelled = utils.ravel_dimensions(data_array, ['y', 'x']) + xarray.testing.assert_equal(ravelled, expected) -def test_linearize_dimensions_extra_dimensions(): +def test_ravel_dimensions_extra_dimensions(): data_array = xarray.DataArray( data=numpy.random.random((2, 7, 5, 3)), dims=['t', 'x', 'y', 'z'], @@ -465,11 +499,11 @@ def test_linearize_dimensions_extra_dimensions(): 'z': data_array.coords['z'], }, ) - linearized = utils.ravel_dimensions(data_array, ['y', 'x']) - xarray.testing.assert_equal(linearized, expected) + ravelled = utils.ravel_dimensions(data_array, ['y', 'x']) + xarray.testing.assert_equal(ravelled, expected) -def test_linearize_dimensions_custom_name(): +def test_ravel_dimensions_custom_name(): data_array = xarray.DataArray( data=numpy.random.random((2, 7, 5, 3)), dims=['t', 'x', 'y', 'z'], @@ -478,21 +512,24 @@ def test_linearize_dimensions_custom_name(): data=numpy.reshape(numpy.transpose(data_array.values, (0, 3, 2, 1)), (2, 3, -1)), dims=['t', 'z', 'i'], ) - linearized = utils.ravel_dimensions(data_array, ['y', 'x'], linear_dimension='i') - xarray.testing.assert_equal(linearized, expected) + ravelled = utils.ravel_dimensions(data_array, ['y', 'x'], linear_dimension='i') + xarray.testing.assert_equal(ravelled, expected) -def test_linearize_dimensions_auto_name_conflict(): +def test_ravel_dimensions_auto_name_conflict(): data_array = xarray.DataArray( data=numpy.random.random((2, 7, 5, 3)), dims=['index', 'index_0', 'index_1', 'index_2'], ) + # The new dimension will be index_3, as it is the first unused dimension found. + # The returned array will not have index_1 or index_2 after they have been ravelled. + # This does leave a gap in the numbering. expected = xarray.DataArray( data=numpy.reshape(numpy.transpose(data_array.values, (0, 1, 3, 2)), (2, 7, -1)), - dims=['index', 'index_0', 'index_1'], + dims=['index', 'index_0', 'index_3'], ) - linearized = utils.ravel_dimensions(data_array, ['index_2', 'index_1']) - xarray.testing.assert_equal(linearized, expected) + ravelled = utils.ravel_dimensions(data_array, ['index_2', 'index_1']) + xarray.testing.assert_equal(ravelled, expected) def test_wind_dimension(): From 8e164246dfb75794163bf3092ec853d9a26b8263 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Wed, 10 Jul 2024 13:35:14 +1000 Subject: [PATCH 3/5] Add `Convention.select_indexes()` and related functions This is a breaking change for plugins as it changes the abstract methods on the Convention class. `Convention.selector_for_indexes()` is a new required method, and `Convention.selector_for_index()` now has a default implementation which calls the new method. --- src/emsarray/conventions/_base.py | 152 ++++++++++++++++-- src/emsarray/operations/point_extraction.py | 27 ++-- tests/conventions/test_base.py | 16 +- tests/conventions/test_shoc_standard.py | 89 +++++++--- .../point_extraction/test_extract_points.py | 2 +- 5 files changed, 232 insertions(+), 54 deletions(-) diff --git a/src/emsarray/conventions/_base.py b/src/emsarray/conventions/_base.py index 4820dc5..2bbc8a8 100644 --- a/src/emsarray/conventions/_base.py +++ b/src/emsarray/conventions/_base.py @@ -5,7 +5,7 @@ import warnings from collections.abc import Callable, Hashable, Iterable, Sequence from functools import cached_property -from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast +from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, cast import numpy import xarray @@ -17,7 +17,7 @@ from emsarray import utils from emsarray.compat.shapely import SpatialIndex from emsarray.exceptions import NoSuchCoordinateError -from emsarray.operations import depth +from emsarray.operations import depth, point_extraction from emsarray.plot import ( _requires_plot, animate_on_figure, make_plot_title, plot_on_figure, polygons_to_collection @@ -516,7 +516,7 @@ def get_depth_coordinate_for_data_array( candidates = [ coordinate for coordinate in self.depth_coordinates - if coordinate.dims[0] in data_array.dims + if set(coordinate.dims) <= set(data_array.dims) ] if len(candidates) == 0: raise NoSuchCoordinateError(f"No depth coordinate found for {name}") @@ -1471,8 +1471,7 @@ def get_index_for_point( polygon=self.polygons[linear_index]) return None - @abc.abstractmethod - def selector_for_index(self, index: Index) -> dict[Hashable, int]: + def selector_for_index(self, index: Index) -> xarray.Dataset: """ Convert a convention native index into a selector that can be passed to :meth:`Dataset.isel `. @@ -1494,11 +1493,24 @@ def selector_for_index(self, index: Index) -> dict[Hashable, int]: :meth:`.select_point` :ref:`indexing` """ + index_dimension = utils.find_unused_dimension(self.dataset, 'index') + dataset = self.selector_for_indexes([index], index_dimension=index_dimension) + dataset = dataset.squeeze(dim=index_dimension, drop=False) + return dataset + + @abc.abstractmethod + def selector_for_indexes( + self, + indexes: list[Index], + *, + index_dimension: Hashable | None = None, + ) -> xarray.Dataset: pass def select_index( self, index: Index, + drop_geometry: bool = True, ) -> xarray.Dataset: """ Return a new dataset that contains values only from a single index. @@ -1516,6 +1528,62 @@ def select_index( index : :data:`Index` The index to select. The index must be for the default grid kind for this dataset. + drop_geometry : bool, default True + Whether to drop geometry variables from the returned point dataset. + If the geometry data is kept + the associated geometry data will no longer conform to the dataset convention + and may not conform to any sensible convention at all. + The format of the geometry data left after selecting points is convention-dependent. + + Returns + ------- + :class:`xarray.Dataset` + A new dataset that is subset to the one index. + + Notes + ----- + + The returned dataset will most likely not have sufficient coordinate data + to be used with a particular :class:`Convention` any more. + The ``dataset.ems`` accessor will raise an error if accessed on the new dataset. + """ + index_dimension = utils.find_unused_dimension(self.dataset, 'index') + dataset = self.select_indexes([index], index_dimension=index_dimension, drop_geometry=drop_geometry) + dataset = dataset.squeeze(dim=index_dimension, drop=False) + return dataset + + def select_indexes( + self, + indexes: list[Index], + *, + index_dimension: Hashable | None = None, + drop_geometry: bool = True, + ) -> xarray.Dataset: + """ + Return a new dataset that contains values only at the selected indexes. + This is much like doing a :func:`xarray.Dataset.isel()` on some indexes, + but works with convention native index types. + + An index is associated with a grid kind. + The returned dataset will only contain variables that were defined on this grid, + with the single indexed point selected. + For example, if the index of a face is passed in, + the returned dataset will not contain any variables defined on an edge. + + Parameters + ---------- + index : :data:`Index` + The index to select. + The index must be for the default grid kind for this dataset. + index_dimension : str, optional + The name of the new dimension added for each index to select. + Defaults to the :func:`first unused dimension <.utils.find_unused_dimension>` with prefix `index`. + drop_geometry : bool, default True + Whether to drop geometry variables from the returned point dataset. + If the geometry data is kept + the associated geometry data will no longer conform to the dataset convention + and may not conform to any sensible convention at all. + The format of the geometry data left after selecting points is convention-dependent. Returns ------- @@ -1529,16 +1597,21 @@ def select_index( to be used with a particular :class:`Convention` any more. The ``dataset.ems`` accessor will raise an error if accessed on the new dataset. """ - selector = self.selector_for_index(index) + selector = self.selector_for_indexes(indexes, index_dimension=index_dimension) # Make a new dataset consisting of only data arrays that use at least # one of these dimensions. - dims = set(selector.keys()) + if drop_geometry: + dataset = self.drop_geometry() + else: + dataset = self.dataset + + dims = set(selector.variables.keys()) names = [ - name for name, data_array in self.dataset.items() + name for name, data_array in dataset.items() if dims.intersection(data_array.dims) ] - dataset = utils.extract_vars(self.dataset, names) + dataset = utils.extract_vars(dataset, names) # Select just this point return dataset.isel(selector) @@ -1565,6 +1638,38 @@ def select_point(self, point: Point) -> xarray.Dataset: raise ValueError("Point did not intersect dataset") return self.select_index(index.index) + def select_points( + self, + points: list[Point], + *, + point_dimension: Hashable | None = None, + missing_points: Literal['error', 'drop'] = 'error', + ) -> xarray.Dataset: + """ + Extract values from all variables on the default grid at a sequence of points. + + Parameters + ---------- + points : list of shapely.Point + The points to extract + point_dimension : str, optional + The name of the new dimension used to index points. + Defaults to 'point', or 'point_0', 'point_1', etc if those dimensions already exist. + missing_points : {'error', 'drop'}, default 'error' + What to do if a point does not intersect the dataset. + 'raise' will raise an error, while 'drop' will drop those points. + + Returns + ------- + xarray.Dataset + A dataset with values extracted from the points. + No variables not defined on the default grid and no geometry variables will be present. + """ + if point_dimension is None: + point_dimension = utils.find_unused_dimension(self.dataset, 'point') + return point_extraction.extract_points( + self.dataset, points, point_dimension=point_dimension, missing_points=missing_points) + @abc.abstractmethod def get_all_geometry_names(self) -> list[Hashable]: """ @@ -1936,7 +2041,30 @@ def wind( dimensions=dimensions, sizes=sizes, linear_dimension=linear_dimension) - def selector_for_index(self, index: Index) -> dict[Hashable, int]: - grid_kind, indexes = self.unpack_index(index) + def selector_for_indexes( + self, + indexes: list[Index], + *, + index_dimension: Hashable | None = None, + ) -> xarray.Dataset: + if index_dimension is None: + index_dimension = utils.find_unused_dimension(self.dataset, 'index') + if len(indexes) == 0: + raise ValueError("Need at least one index to select") + + grid_kinds, index_tuples = zip(*[self.unpack_index(index) for index in indexes]) + + unique_grid_kinds = set(grid_kinds) + if len(unique_grid_kinds) > 1: + raise ValueError( + "All indexes must be on the same grid kind, got " + + ", ".join(map(repr, unique_grid_kinds))) + + grid_kind = grid_kinds[0] dimensions = self.grid_dimensions[grid_kind] - return dict(zip(dimensions, indexes)) + # This array will have shape (len(indexes), len(dimensions)) + index_array = numpy.array(index_tuples) + return xarray.Dataset({ + dimension: (index_dimension, index_array[:, i]) + for i, dimension in enumerate(dimensions) + }) diff --git a/src/emsarray/operations/point_extraction.py b/src/emsarray/operations/point_extraction.py index f183f20..13b8ac9 100644 --- a/src/emsarray/operations/point_extraction.py +++ b/src/emsarray/operations/point_extraction.py @@ -23,8 +23,8 @@ import xarray import xarray.core.dtypes as xrdtypes +import emsarray.conventions from emsarray import utils -from emsarray.conventions import Convention @dataclasses.dataclass @@ -67,7 +67,7 @@ def extract_points( dataset: xarray.Dataset, points: list[shapely.Point], *, - point_dimension: Hashable = 'point', + point_dimension: Hashable | None = None, missing_points: Literal['error', 'drop'] = 'error', ) -> xarray.Dataset: """ @@ -106,7 +106,10 @@ def extract_points( -------- :func:`extract_dataframe` """ - convention: Convention = dataset.ems + convention: emsarray.conventions.Convention = dataset.ems + + if point_dimension is None: + point_dimension = utils.find_unused_dimension(dataset, 'point') # Find the indexer for each given point indexes = numpy.array([convention.get_index_for_point(point) for point in points]) @@ -118,21 +121,17 @@ def extract_points( indexes=out_of_bounds, points=[points[i] for i in out_of_bounds]) - # Make a DataFrame out of all point indexers - selector = convention.selector_for_indexes([i.index for i in indexes]) - selector_df = pandas.DataFrame([ - convention.selector_for_index(index.index) - for index in indexes - if index is not None]) - point_indexes = [i for i, index in enumerate(indexes) if index is not None] + point_ds = convention.select_indexes( + [index.index for index in indexes if index is not None], + index_dimension=point_dimension, + drop_geometry=True) - # Subset the dataset to the points - point_ds = convention.drop_geometry() - selector_ds = _dataframe_to_dataset(selector_df, dimension_name=point_dimension) - point_ds = point_ds.isel(selector_ds) + # Number the points + point_indexes = [i for i, index in enumerate(indexes) if index is not None] point_ds = point_ds.assign_coords({ point_dimension: ([point_dimension], point_indexes), }) + return point_ds diff --git a/tests/conventions/test_base.py b/tests/conventions/test_base.py index a259982..0aabd45 100644 --- a/tests/conventions/test_base.py +++ b/tests/conventions/test_base.py @@ -71,8 +71,20 @@ def wind_index( def ravel_index(self, indexes: SimpleGridIndex) -> int: return int(numpy.ravel_multi_index((indexes.y, indexes.x), self.shape)) - def selector_for_index(self, index: SimpleGridIndex) -> dict[Hashable, int]: - return {'x': index.x, 'y': index.y} + def selector_for_indexes( + self, + indexes: list[SimpleGridIndex], + *, + index_dimension: Hashable | None = None, + ) -> xarray.Dataset: + if index_dimension is None: + index_dimension = utils.find_unused_dimension(self.dataset, 'index') + index_array = numpy.array([[index.x, index.y] for index in indexes]) + + return xarray.Dataset({ + 'x': (index_dimension, index_array[:, 0]), + 'y': (index_dimension, index_array[:, 1]), + }) def ravel( self, diff --git a/tests/conventions/test_shoc_standard.py b/tests/conventions/test_shoc_standard.py index 3bdaeed..cf944fd 100644 --- a/tests/conventions/test_shoc_standard.py +++ b/tests/conventions/test_shoc_standard.py @@ -11,8 +11,10 @@ from shapely.geometry.polygon import Polygon, orient from emsarray.conventions import get_dataset_convention -from emsarray.conventions.arakawa_c import c_mask_from_centres -from emsarray.conventions.shoc import ArakawaCGridKind, ShocStandard +from emsarray.conventions.arakawa_c import ( + ArakawaCGridKind, ArakawaCIndex, c_mask_from_centres +) +from emsarray.conventions.shoc import ShocStandard from emsarray.operations import geometry from tests.utils import ( DiagonalShocGrid, ShocGridGenerator, ShocLayerGenerator, mask_from_strings @@ -346,13 +348,25 @@ def test_grid_kind_and_size(): @pytest.mark.parametrize( ['index', 'selector'], ( - [(ArakawaCGridKind.face, 3, 4), {'j_centre': 3, 'i_centre': 4}], - [(ArakawaCGridKind.left, 5, 6), {'j_left': 5, 'i_left': 6}], - [(ArakawaCGridKind.back, 7, 8), {'j_back': 7, 'i_back': 8}], - [(ArakawaCGridKind.node, 9, 10), {'j_node': 9, 'i_node': 10}], + [(ArakawaCGridKind.face, 3, 4), xarray.Dataset({ + 'j_centre': ((), 3), + 'i_centre': ((), 4), + })], + [(ArakawaCGridKind.left, 5, 6), xarray.Dataset({ + 'j_left': ((), 5), + 'i_left': ((), 6), + })], + [(ArakawaCGridKind.back, 7, 8), xarray.Dataset({ + 'j_back': ((), 7), + 'i_back': ((), 8), + })], + [(ArakawaCGridKind.node, 9, 10), xarray.Dataset({ + 'j_node': ((), 9), + 'i_node': ((), 10), + })], ), ) -def test_selector_for_index(index, selector): +def test_selector_for_index(index: ArakawaCIndex, selector: dict): dataset = make_dataset(j_size=5, i_size=7) convention: ShocStandard = dataset.ems assert selector == convention.selector_for_index(index) @@ -360,16 +374,24 @@ def test_selector_for_index(index, selector): # These select_index tests are not specifically about SHOC, # they are more about how select_index behaves with multiple grid kinds. -def test_select_index_face(): +def test_select_index_face() -> None: dataset = make_dataset(time_size=4, k_size=5, j_size=5, i_size=9) convention: ShocStandard = dataset.ems face = convention.select_index((ArakawaCGridKind.face, 3, 4)) - assert set(face.data_vars.keys()) == { - # These are the data variables we expect to see + assert set(face.variables.keys()) == { + 'botz', 'eta', 'temp', + } + assert face.sizes == {'record': 4, 'k_centre': 5} + + +def test_select_index_face_keep_geometry() -> None: + dataset = make_dataset(time_size=4, k_size=5, j_size=5, i_size=9) + convention: ShocStandard = dataset.ems + face = convention.select_index((ArakawaCGridKind.face, 3, 4), drop_geometry=False) + + assert set(face.variables.keys()) == { 'botz', 'eta', 'temp', - # These coordinates variables are also included in data_vars - # because of how xarray handles multidimensional coordinates 'x_centre', 'y_centre', } assert face.sizes == {'record': 4, 'k_centre': 5} @@ -377,7 +399,7 @@ def test_select_index_face(): assert face['y_centre'].values == dataset['y_centre'].values[3, 4] -def test_select_index_edge(): +def test_select_index_edge() -> None: dataset = make_dataset(time_size=4, k_size=5, j_size=5, i_size=9) convention: ShocStandard = dataset.ems @@ -386,9 +408,6 @@ def test_select_index_edge(): # This is the only data variable we expect to see, # as it is the only one defined on left edges. 'u1', - # These coordinates variables are also included in data_vars - # because of how xarray handles multidimensional coordinates - 'x_left', 'y_left' } assert left.sizes == {'record': 4, 'k_centre': 5} @@ -397,25 +416,45 @@ def test_select_index_edge(): # This is the only data variable we expect to see, # as it is the only one defined on back edges. 'u2', - # These coordinates variables are also included in data_vars - # because of how xarray handles multidimensional coordinates - 'x_back', 'y_back' } assert back.sizes == {'record': 4, 'k_centre': 5} -def test_select_index_grid(): +def test_select_index_edge_keep_geometry() -> None: + dataset = make_dataset(time_size=4, k_size=5, j_size=5, i_size=9) + convention: ShocStandard = dataset.ems + + left = convention.select_index((ArakawaCGridKind.left, 3, 4), drop_geometry=False) + assert set(left.variables.keys()) == { + 'u1', 'x_left', 'y_left' + } + assert left.sizes == {'record': 4, 'k_centre': 5} + + back = convention.select_index((ArakawaCGridKind.back, 3, 4), drop_geometry=False) + assert set(back.variables.keys()) == { + 'u2', 'x_back', 'y_back' + } + assert back.sizes == {'record': 4, 'k_centre': 5} + + +def test_select_index_grid() -> None: dataset = make_dataset(time_size=4, k_size=5, j_size=5, i_size=9) convention: ShocStandard = dataset.ems node = convention.select_index((ArakawaCGridKind.node, 3, 4)) assert set(node.data_vars.keys()) == { - # This is the only data variable we expect to see, - # as it is the only one defined on the node. 'flag', - # These coordinates variables are also included in data_vars - # because of how xarray handles multidimensional coordinates - 'x_grid', 'y_grid' + } + assert node.sizes == {'record': 4, 'k_centre': 5} + + +def test_select_index_grid_keep_geometry() -> None: + dataset = make_dataset(time_size=4, k_size=5, j_size=5, i_size=9) + convention: ShocStandard = dataset.ems + + node = convention.select_index((ArakawaCGridKind.node, 3, 4), drop_geometry=False) + assert set(node.data_vars.keys()) == { + 'flag', 'x_grid', 'y_grid' } assert node.sizes == {'record': 4, 'k_centre': 5} diff --git a/tests/operations/point_extraction/test_extract_points.py b/tests/operations/point_extraction/test_extract_points.py index 2c0973e..6f979cf 100644 --- a/tests/operations/point_extraction/test_extract_points.py +++ b/tests/operations/point_extraction/test_extract_points.py @@ -24,7 +24,7 @@ def test_extract_points( assert 'point' in point_dataset.dims assert point_dataset.sizes['point'] == num_points - assert point_dataset.variables.keys() == {'values', 'point'} + assert set(point_dataset.variables.keys()) == {'values', 'point'} values = point_dataset.data_vars['values'] assert values.dims == ('point',) From 081c313bd99ed8b83cc29d0dc01974bb13ad8aab Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Wed, 10 Jul 2024 13:54:20 +1000 Subject: [PATCH 4/5] Add release note --- docs/releases/development.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/releases/development.rst b/docs/releases/development.rst index bf4e943..ea3dd5d 100644 --- a/docs/releases/development.rst +++ b/docs/releases/development.rst @@ -35,3 +35,19 @@ Next release (in development) (:pr:`142`). * Add :attr:`.Convention.depth_coordinates` and :meth:`.Convention.get_depth_coordinate_for_data_array()`. Deprecate functions :meth:`.Convention.get_depth_name()`, :meth:`.Convention.get_all_depth_names()`, and :meth:`Convention.get_time_name()`. Remove deprecated functions ``Convention.get_depths()`` and ``Convention.get_times()`` (:pr:`143`). * Swap to using `pyproject.toml` for all project metadata (:pr:`145`). +* Add new methods + :meth:`.Convention.selector_for_indexes()`, + :meth:`.Convention.select_indexes()`, and + :meth:`.Convention.select_points()`. + These allow for more efficient extraction of multiple points at the same time. + The return type of :meth:`.Convention.selector_for_index()` has been changed + from a `dict` to an :class:`xarray.Dataset`, + but this new value is also designed to be passed directly to :meth:`Dataset.isel() `. + :meth:`.Convention.select_index()` and :meth:`.Convention.select_indexes()` + have a new `drop_geometry` flag which defaults to True. + Previously these methods would act as if `drop_geometry` was False, + but this led to convention-dependent results as to which geometry variables were returned. + The fragmented geometry variables from different conventions often did not contain enough data to be useful. + By dropping geometry the results are more consistent across all conventions + and do not contain potentially fragmented geometry information. + (:issue:`106`, :pr:`146`). From b3638264580879d2b851fb78b1021eab039bb528 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Wed, 10 Jul 2024 16:26:18 +1000 Subject: [PATCH 5/5] fixup! Add `Convention.select_indexes()` and related functions --- src/emsarray/conventions/_base.py | 76 +++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/src/emsarray/conventions/_base.py b/src/emsarray/conventions/_base.py index 2bbc8a8..287fddb 100644 --- a/src/emsarray/conventions/_base.py +++ b/src/emsarray/conventions/_base.py @@ -1483,15 +1483,25 @@ def selector_for_index(self, index: Index) -> xarray.Dataset: Returns ------- - selector - A dict suitable for passing to :meth:`xarray.Dataset.isel` + selector : xarray.Dataset + A selector suitable for passing to :meth:`xarray.Dataset.isel` that will select values at this index. See Also -------- :meth:`.select_index` :meth:`.select_point` + :meth:`.selector_for_indexes` :ref:`indexing` + + Notes + ----- + + The returned selector is an :class:`xarray.Dataset`, + but the contents of the dataset are dependent on the specific convention + and may change between versions of emsarray. + The selector should be treated as an opaque value + that is only useful when passed to :meth:`xarray.Dataset.isel()`. """ index_dimension = utils.find_unused_dimension(self.dataset, 'index') dataset = self.selector_for_indexes([index], index_dimension=index_dimension) @@ -1505,6 +1515,37 @@ def selector_for_indexes( *, index_dimension: Hashable | None = None, ) -> xarray.Dataset: + """ + Convert a list of convention native indexes into a selector + that can be passed to :meth:`Dataset.isel `. + + Parameters + ---------- + indexes : list of :data:`Index` + A list of convention native indexes + + Returns + ------- + selector : xarray.Dataset + A selector suitable for passing to :meth:`xarray.Dataset.isel` + that will select values at this index. + + See Also + -------- + :meth:`.select_indexes` + :meth:`.select_points` + :meth:`.selector_for_index` + :ref:`indexing` + + Notes + ----- + + The returned selector is an :class:`xarray.Dataset`, + but the contents of the dataset are dependent on the specific convention + and may change between versions of emsarray. + The selector should be treated as an opaque value + that is only useful when passed to :meth:`xarray.Dataset.isel()`. + """ pass def select_index( @@ -1527,7 +1568,6 @@ def select_index( ---------- index : :data:`Index` The index to select. - The index must be for the default grid kind for this dataset. drop_geometry : bool, default True Whether to drop geometry variables from the returned point dataset. If the geometry data is kept @@ -1540,6 +1580,11 @@ def select_index( :class:`xarray.Dataset` A new dataset that is subset to the one index. + See also + -------- + :meth:`.select_point` + :meth:`.select_indexes` + Notes ----- @@ -1566,15 +1611,15 @@ def select_indexes( An index is associated with a grid kind. The returned dataset will only contain variables that were defined on this grid, - with the single indexed point selected. + with the indexed points selected. For example, if the index of a face is passed in, the returned dataset will not contain any variables defined on an edge. Parameters ---------- - index : :data:`Index` - The index to select. - The index must be for the default grid kind for this dataset. + indexes : list of :data:`Index` + The indexes to select. + The indexes must all be for the same grid kind. index_dimension : str, optional The name of the new dimension added for each index to select. Defaults to the :func:`first unused dimension <.utils.find_unused_dimension>` with prefix `index`. @@ -1588,7 +1633,12 @@ def select_indexes( Returns ------- :class:`xarray.Dataset` - A new dataset that is subset to the one index. + A new dataset that is subset to the indexes. + + See also + -------- + :meth:`.select_points` + :meth:`.select_index` Notes ----- @@ -1632,6 +1682,11 @@ def select_point(self, point: Point) -> xarray.Dataset: ------- xarray.Dataset A dataset of values at the point + + See also + -------- + :meth:`.select_index` + :meth:`.select_points` """ index = self.get_index_for_point(point) if index is None: @@ -1664,6 +1719,11 @@ def select_points( xarray.Dataset A dataset with values extracted from the points. No variables not defined on the default grid and no geometry variables will be present. + + See also + -------- + :meth:`.select_indexes` + :meth:`.select_point` """ if point_dimension is None: point_dimension = utils.find_unused_dimension(self.dataset, 'point')