From b15de6f86207be6f1ca848ed7579cf4e0411973a Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 29 May 2024 20:13:50 -0400 Subject: [PATCH 1/2] sketch of .inherit method --- xarray/core/datatree.py | 46 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 5737cdcb686..967787cb11e 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -34,7 +34,7 @@ datatree_repr as datatree_repr_html, ) from xarray.core.indexes import Index, Indexes -from xarray.core.merge import dataset_update_method +from xarray.core.merge import dataset_update_method, merge_core from xarray.core.options import OPTIONS as XR_OPTS from xarray.core.treenode import NamedNode, NodePath, Tree from xarray.core.utils import ( @@ -967,6 +967,50 @@ def update( inplace=True, children=merged_children, **vars_merge_result._asdict() ) + @property + def inherit(self) -> DataTree: + """ + Returns a copy of this node additionally containing all coordinates that can be inherited from parent nodes. + + Inspired by the CF conventions' "search by proximity" [1]_. + + References + ---------- + .. [1] https://cfconventions.org/Data/cf-conventions/cf-conventions-1.9/cf-conventions.html#_search_by_proximity + """ + + def find_compatible_variables( + candidate_variables: Mapping[str, Variable], + existing_variables: Mapping[str, Variable], + ) -> Mapping[str, Variable]: + """Check variables for compatibility as inherited variables.""" + + # To be compatible, candidate variables must be both new and alignable. + # This should drop candidate variables which are duplicated or not mergeable. + return merge_core( + [existing_variables, candidate_variables], + priority_arg=1, + explicit_coords=list(candidate_variables.keys()) + combine_attrs="override", + ) + + # TODO this will call merge for every parent up to the root. Is there an alternative design which only calls merge once? + + # TODO use _MergeResult instead? + + local_variables = self._variables + all_inherited_variables: Mapping[str, Variable] = {} + for parent in self.parents: + # Update inherited_variables mapping at each node when new variables are encountered + inheritable_variables = find_compatible_variables( + parent.coords, local_variables | all_inherited_variables + ) + + # TODO should we be keeping track of which variables were coordinates? + all_inherited_variables.update(**inheritable_variables) + + return DataTree.copy().update(all_inherited_variables) + def assign( self, items: Mapping[Any, Any] | None = None, **items_kwargs: Any ) -> DataTree: From fe207b50e512c962d58bc2b381bbea0c56b071c1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 05:53:46 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/datatree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 967787cb11e..c38a548fac4 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -971,7 +971,7 @@ def update( def inherit(self) -> DataTree: """ Returns a copy of this node additionally containing all coordinates that can be inherited from parent nodes. - + Inspired by the CF conventions' "search by proximity" [1]_. References @@ -993,7 +993,7 @@ def find_compatible_variables( explicit_coords=list(candidate_variables.keys()) combine_attrs="override", ) - + # TODO this will call merge for every parent up to the root. Is there an alternative design which only calls merge once? # TODO use _MergeResult instead?