Merge branch 'master' into master

ecmwf · Jul 19, 2024 · 2d06f22 · 2d06f22
2 parents 6f5e9ed + c9d7ef9
commit 2d06f22
Show file tree

Hide file tree

Showing 18 changed files with 221 additions and 26 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,18 @@
 Changelog for cfgrib
 ====================
 
+0.9.13.0 (2024-06-27)
+---------------------
+
+- Allow users to pass of list of values to filter a key by.
+  See `#384 <https://github.com/ecmwf/cfgrib/pull/384>`_.
+
+- Functionality to ignore keys when reading a grib file
+  See `#382 <https://github.com/ecmwf/cfgrib/pull/382>`_.
+
+- Preserve coordinate encoding in cfgrib.open_datasets
+  See `#381 <https://github.com/ecmwf/cfgrib/pull/381>`_.
+
 0.9.12.0 (2024-05-26)
 ---------------------
 

diff --git a/cfgrib/__init__.py b/cfgrib/__init__.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.9.12.0"
+__version__ = "0.9.13.0"
 
 # cfgrib core API depends on the ECMWF ecCodes C-library only
 from .abc import Field, Fieldset, Index, MappingFieldset

diff --git a/cfgrib/cfmessage.py b/cfgrib/cfmessage.py
@@ -171,6 +171,10 @@ def build_valid_time(time, step):
         functools.partial(from_grib_date_time, date_key="indexingDate", time_key="indexingTime"),
         functools.partial(to_grib_date_time, date_key="indexingDate", time_key="indexingTime"),
     ),
+    "valid_month": (
+        functools.partial(from_grib_date_time, date_key="monthlyVerificationDate", time_key="validityTime"),
+        functools.partial(to_grib_date_time, date_key="monthlyVerificationDate", time_key="validityTime"),
+    ),
 }  # type: messages.ComputedKeysType
 
 

diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py
@@ -161,6 +161,7 @@
     "verifying_time",
     "forecastMonth",
     "indexing_time",
+    "valid_month",
 ]
 SPECTRA_KEYS = ["directionNumber", "frequencyNumber"]
 
@@ -251,6 +252,12 @@
         "standard_name": "time",
         "long_name": "time",
     },
+    "valid_month": {
+        "units": "seconds since 1970-01-01T00:00:00",
+        "calendar": "proleptic_gregorian",
+        "standard_name": "time",
+        "long_name": "time",
+    },
     "verifying_time": {
         "units": "seconds since 1970-01-01T00:00:00",
         "calendar": "proleptic_gregorian",
@@ -333,9 +340,9 @@ def get_values_in_order(message, shape):
 class OnDiskArray:
     index: abc.Index[T.Any, abc.Field]
     shape: T.Tuple[int, ...]
-    field_id_index: T.Dict[
-        T.Tuple[T.Any, ...], T.List[T.Union[int, T.Tuple[int, int]]]
-    ] = attr.attrib(repr=False)
+    field_id_index: T.Dict[T.Tuple[T.Any, ...], T.List[T.Union[int, T.Tuple[int, int]]]] = (
+        attr.attrib(repr=False)
+    )
     missing_value: float
     geo_ndim: int = attr.attrib(default=1, repr=False)
     dtype = np.dtype("float32")
@@ -458,10 +465,7 @@ def encode_cf_first(data_var_attrs, encode_cf=("parameter", "time"), time_dims=(
         if "GRIB_units" in data_var_attrs:
             data_var_attrs["units"] = data_var_attrs["GRIB_units"]
     if "time" in encode_cf:
-        if set(time_dims).issubset(ALL_REF_TIME_KEYS):
-            coords_map.extend(time_dims)
-        else:
-            raise ValueError("time_dims %r not a subset of %r" % (time_dims, ALL_REF_TIME_KEYS))
+        coords_map.extend(time_dims)
     else:
         coords_map.extend(DATA_TIME_KEYS)
     coords_map.extend(VERTICAL_KEYS)
@@ -491,6 +495,7 @@ def build_variable_components(
     read_keys: T.Iterable[str] = (),
     time_dims: T.Sequence[str] = ("time", "step"),
     extra_coords: T.Dict[str, str] = {},
+    coords_as_attributes: T.Dict[str, str] = {},
     cache_geo_coords: bool = True,
 ) -> T.Tuple[T.Dict[str, int], Variable, T.Dict[str, Variable]]:
     data_var_attrs = enforce_unique_attributes(index, DATA_ATTRIBUTES_KEYS, filter_by_keys)
@@ -499,8 +504,9 @@ def build_variable_components(
     first = index.first()
     extra_attrs = read_data_var_attrs(first, extra_keys)
     data_var_attrs.update(**extra_attrs)
-    coords_map = encode_cf_first(data_var_attrs, encode_cf, time_dims)
-
+    coords_map = encode_cf_first(
+        data_var_attrs, encode_cf, time_dims,
+    )
     coord_name_key_map = {}
     coord_vars = {}
     for coord_key in coords_map:
@@ -516,6 +522,9 @@ def build_variable_components(
             and "GRIB_typeOfLevel" in data_var_attrs
         ):
             coord_name = data_var_attrs["GRIB_typeOfLevel"]
+        if coord_name in coords_as_attributes and len(values) == 1:
+            data_var_attrs[f"GRIB_{coord_name}"] = values
+            continue
         coord_name_key_map[coord_name] = coord_key
         attributes = {
             "long_name": "original GRIB coordinate for key: %s(%s)" % (orig_name, coord_name),
@@ -662,12 +671,21 @@ def build_dataset_components(
     read_keys: T.Iterable[str] = (),
     time_dims: T.Sequence[str] = ("time", "step"),
     extra_coords: T.Dict[str, str] = {},
+    coords_as_attributes: T.Dict[str, str] = {},
     cache_geo_coords: bool = True,
 ) -> T.Tuple[T.Dict[str, int], T.Dict[str, Variable], T.Dict[str, T.Any], T.Dict[str, T.Any]]:
     dimensions = {}  # type: T.Dict[str, int]
     variables = {}  # type: T.Dict[str, Variable]
     filter_by_keys = index.filter_by_keys
 
+    # Warn about time_dims here to prevent repeasted messages in build_variable_components
+    if errors != "ignore" and not set(time_dims).issubset(ALL_REF_TIME_KEYS):
+        log.warning(
+            "Not all time_dimensions are recognised, those which are not in the following list will not "
+            " be decoded as datetime objects:\n"
+            f"{ALL_REF_TIME_KEYS}"
+        )
+
     for param_id in index.get("paramId", []):
         var_index = index.subindex(paramId=param_id)
         try:
@@ -680,6 +698,7 @@ def build_dataset_components(
                 read_keys=read_keys,
                 time_dims=time_dims,
                 extra_coords=extra_coords,
+                coords_as_attributes=coords_as_attributes,
                 cache_geo_coords=cache_geo_coords,
             )
         except DatasetBuildError as ex:
@@ -752,6 +771,7 @@ def open_fieldset(
     indexpath: T.Optional[str] = None,
     filter_by_keys: T.Dict[str, T.Any] = {},
     read_keys: T.Sequence[str] = (),
+    ignore_keys: T.Sequence[str] = [],
     time_dims: T.Sequence[str] = ("time", "step"),
     extra_coords: T.Dict[str, str] = {},
     computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS,
@@ -763,6 +783,7 @@ def open_fieldset(
         log.warning(f"indexpath value {indexpath} is ignored")
 
     index_keys = compute_index_keys(time_dims, extra_coords, filter_by_keys)
+    index_keys = [key for key in index_keys if key not in ignore_keys]
     index = messages.FieldsetIndex.from_fieldset(fieldset, index_keys, computed_keys)
     filtered_index = index.subindex(filter_by_keys)
     return open_from_index(filtered_index, read_keys, time_dims, extra_coords, **kwargs)
@@ -772,10 +793,12 @@ def open_fileindex(
     stream: messages.FileStream,
     indexpath: str = messages.DEFAULT_INDEXPATH,
     index_keys: T.Sequence[str] = INDEX_KEYS + ["time", "step"],
+    ignore_keys: T.Sequence[str] = [],
     filter_by_keys: T.Dict[str, T.Any] = {},
     computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS,
 ) -> messages.FileIndex:
     index_keys = sorted(set(index_keys) | set(filter_by_keys))
+    index_keys = [key for key in index_keys if key not in ignore_keys]
     index = messages.FileIndex.from_indexpath_or_filestream(
         stream, index_keys, indexpath=indexpath, computed_keys=computed_keys
     )
@@ -790,12 +813,12 @@ def open_file(
     read_keys: T.Sequence[str] = (),
     time_dims: T.Sequence[str] = ("time", "step"),
     extra_coords: T.Dict[str, str] = {},
+    ignore_keys: T.Sequence[str] = [],
     **kwargs: T.Any,
 ) -> Dataset:
     """Open a GRIB file as a ``cfgrib.Dataset``."""
     path = os.fspath(path)
     stream = messages.FileStream(path, errors=errors)
     index_keys = compute_index_keys(time_dims, extra_coords)
-    index = open_fileindex(stream, indexpath, index_keys, filter_by_keys=filter_by_keys)
-
+    index = open_fileindex(stream, indexpath, index_keys, ignore_keys=ignore_keys, filter_by_keys=filter_by_keys)
     return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs)
diff --git a/cfgrib/messages.py b/cfgrib/messages.py
@@ -468,7 +468,10 @@ def subindex(self, filter_by_keys={}, **query):
         field_ids_index = []
         for header_values, field_ids_values in self.field_ids_index:
             for idx, val in raw_query:
-                if header_values[idx] != val:
+                # Ensure that the values to be tested is a list or tuple
+                if not isinstance(val, (list, tuple)):
+                    val = [val]
+                if header_values[idx] not in val:
                     break
             else:
                 field_ids_index.append((header_values, field_ids_values))

diff --git a/cfgrib/xarray_plugin.py b/cfgrib/xarray_plugin.py
@@ -99,24 +99,28 @@ def open_dataset(
         indexpath: str = messages.DEFAULT_INDEXPATH,
         filter_by_keys: T.Dict[str, T.Any] = {},
         read_keys: T.Iterable[str] = (),
+        ignore_keys: T.Iterable[str] = (),
         encode_cf: T.Sequence[str] = ("parameter", "time", "geography", "vertical"),
         squeeze: bool = True,
         time_dims: T.Iterable[str] = ("time", "step"),
         errors: str = "warn",
         extra_coords: T.Dict[str, str] = {},
+        coords_as_attributes: T.Dict[str, str] = {},
         cache_geo_coords: bool = True,
     ) -> xr.Dataset:
         store = CfGribDataStore(
             filename_or_obj,
             indexpath=indexpath,
             filter_by_keys=filter_by_keys,
             read_keys=read_keys,
+            ignore_keys=ignore_keys,
             encode_cf=encode_cf,
             squeeze=squeeze,
             time_dims=time_dims,
             lock=lock,
             errors=errors,
             extra_coords=extra_coords,
+            coords_as_attributes=coords_as_attributes,
             cache_geo_coords=cache_geo_coords,
         )
         with xr.core.utils.close_on_error(store):

diff --git a/ci/requirements-docs.txt b/ci/requirements-docs.txt
@@ -7,25 +7,25 @@
 alabaster==0.7.12         # via sphinx
 attrs==19.3.0
 babel==2.9.1              # via sphinx
-certifi==2023.7.22       # via requests
+certifi==2024.07.04       # via requests
 cffi==1.14.0
 chardet==3.0.4            # via requests
 click==7.1.2
 docutils==0.16            # via sphinx
-idna==2.9                 # via requests
+idna==3.7                 # via requests
 imagesize==1.2.0          # via sphinx
-jinja2==2.11.3            # via sphinx
+jinja2==3.1.4             # via sphinx
 markupsafe==1.1.1         # via jinja2
 numpy==1.22.0
 packaging==20.3           # via sphinx
 pandas==1.0.3             # via xarray
 pycparser==2.20           # via cffi
-pygments==2.7.4           # via sphinx
+pygments==2.15.0          # via sphinx
 pyparsing==2.4.7          # via packaging
 pytest-runner==5.2
 python-dateutil==2.8.1    # via pandas
 pytz==2020.1              # via babel, pandas
-requests==2.31.0          # via sphinx
+requests==2.32.0          # via sphinx
 six==1.14.0               # via packaging, python-dateutil
 snowballstemmer==2.0.0    # via sphinx
 sphinx==3.0.3
@@ -35,5 +35,5 @@ sphinxcontrib-htmlhelp==1.0.3  # via sphinx
 sphinxcontrib-jsmath==1.0.1  # via sphinx
 sphinxcontrib-qthelp==1.0.3  # via sphinx
 sphinxcontrib-serializinghtml==1.1.4  # via sphinx
-urllib3==1.26.5           # via requests
+urllib3==1.26.19           # via requests
 xarray==0.15.1
diff --git a/ci/requirements-tests.txt b/ci/requirements-tests.txt
@@ -19,7 +19,6 @@ packaging==20.3           # via pytest
 pandas==1.0.3             # via xarray
 pep8==1.7.1               # via pytest-pep8
 pluggy==0.13.1            # via pytest
-py==1.10.0                # via pytest
 pycparser==2.20           # via cffi
 pyflakes==2.2.0           # via pytest-flakes
 pyparsing==2.4.7          # via packaging
@@ -29,12 +28,12 @@ pytest-flakes==4.0.0
 pytest-mccabe==1.0
 pytest-pep8==1.0.6
 pytest-runner==5.2
-pytest==5.4.2
+pytest==7.2.0
 python-dateutil==2.8.1    # via pandas
 pytz==2020.1              # via pandas
 scipy==1.8.0
 six==1.14.0               # via packaging, python-dateutil
 toolz==0.10.0             # via dask
 wcwidth==0.1.9            # via pytest
 xarray==0.15.1
-zipp==3.1.0               # via importlib-metadata
+zipp==3.19.1              # via importlib-metadata
diff --git a/tests/environment-macos-3.8.yml b/tests/environment-macos-3.8.yml
@@ -90,7 +90,7 @@ dependencies:
   - python_abi=3.8
   - pytz=2023.3
   - readline=8.2
-  - requests=2.31.0
+  - requests=2.32.0
   - scipy=1.10.1
   - setuptools=68.0.0
   - six=1.16.0

diff --git a/tests/environment-ubuntu-3.7.yml b/tests/environment-ubuntu-3.7.yml
@@ -63,7 +63,7 @@ dependencies:
   - pip=21.2.2
   - pluggy=1.0.0
   - psutil=5.8.0
-  - py=1.10.0
+  - py=1.11.0
   - pycparser=2.21
   - pyparsing=3.0.4
   - pytest=6.2.5

diff --git a/tests/environment-ubuntu-3.8.yml b/tests/environment-ubuntu-3.8.yml
@@ -107,7 +107,7 @@ dependencies:
   - python_abi=3.8
   - pytz=2023.3
   - readline=8.2
-  - requests=2.31.0
+  - requests=2.32.0
   - s2n=1.3.48
   - scipy=1.10.1
   - setuptools=68.0.0

diff --git a/tests/environment-windows-3.8.yml b/tests/environment-windows-3.8.yml
@@ -48,7 +48,7 @@ dependencies:
   - pip=21.2.2
   - pluggy=0.13.1
   - psutil=5.8.0
-  - py=1.10.0
+  - py=1.11.0
   - pycparser=2.21
   - pyparsing=3.0.4
   - pytest=6.2.4

diff --git a/tests/sample-data/cams-egg4-monthly.grib b/tests/sample-data/cams-egg4-monthly.grib
diff --git a/tests/sample-data/era5-levels-members.nc b/tests/sample-data/era5-levels-members.nc
diff --git a/tests/sample-data/soil-surface-level-mix.grib b/tests/sample-data/soil-surface-level-mix.grib