Skip to content

Commit

Permalink
#525: added function to export with xarray, some cleaning, and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mortenwh authored and akorosov committed Jan 5, 2023
1 parent 0f9783b commit 3ca223e
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 9 deletions.
74 changes: 73 additions & 1 deletion nansat/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import tempfile
import datetime
import warnings
import importlib

from nansat.utils import gdal
import numpy as np
Expand All @@ -30,14 +31,85 @@

from nansat.exceptions import NansatGDALError

try:
import xarray as xr
except:
warnings.warn("'xarray' needs to be installed for Exporter.xr_export to work.")


class Exporter(object):
"""Abstract class for export functions """
DEFAULT_INSTITUTE = 'NERSC'
DEFAULT_SOURCE = 'satellite remote sensing'

UNWANTED_METADATA = ['dataType', 'SourceFilename', 'SourceBand', '_Unsigned', 'FillValue',
'time', '_FillValue', 'type', 'scale', 'offset']
'_FillValue', 'type', 'scale', 'offset', 'NETCDF_VARNAME']

def xr_export(self, filename, bands=None, encoding=None):
"""Export Nansat object into netCDF using xarray. Longitude
and latitude arrays will be used as coordinates (of the 2D
dataset). Note that ACDD and CF metadata should be added to
the Nansat object before exporting. This method only removes
the UNWANTED_METADATA and creates the NetCDF file with the
metadata provided in the Nansat object.
Parameters
----------
filename : str
output file name
bands: list (default=None)
Specify band numbers to export.
If None, all bands are exported.
encoding: dict
Nested dictionary with variable names as keys and
dictionaries of variable specific encodings as values,
e.g., {"my_variable": {
"dtype": "int16",
"scale_factor": 0.1,
"zlib": True,
"_FillValue": 9999,
}, ...}
The h5netcdf engine supports both the NetCDF4-style
compression encoding parameters
{"zlib": True, "complevel": 9} and the h5py ones
{"compression": "gzip", "compression_opts": 9}. This
allows using any compression plugin installed in the HDF5
library, e.g. LZF.
"""
xr_installed = importlib.util.find_spec("xarray")
if xr_installed is None:
raise ModuleNotFoundError("Please install 'xarray'")
datavars = {}
lon, lat = self.get_geolocation_grids()
for band in self.bands().keys():
band_metadata = self.get_metadata(band_id=band)
# Clean up the use metadata
pop_keys = []
for key in band_metadata.keys():
if key in self.UNWANTED_METADATA:
pop_keys.append(key)
for key in pop_keys:
band_metadata.pop(key)
if bands is not None:
if (not band in bands) and (not band_metadata["name"] in bands):
continue
datavars[band_metadata["name"]] = xr.DataArray(
self[band],
dims = ["x", "y"],
attrs = band_metadata)
ds = xr.Dataset(
datavars,
coords = {"longitude": (["x", "y"], lon), "latitude": (["x", "y"], lat)},
attrs = self.get_metadata())
if encoding is None:
# Having a coordinate variable with a fill value applied
# violates the cf standard
encoding = {
"longitude": {"_FillValue": None},
"latitude": {"_FillValue": None}
}
ds.to_netcdf(filename, encoding=encoding)

def export(self, filename='', bands=None, rm_metadata=None, add_geolocation=True,
driver='netCDF', options=None, hardcopy=False):
Expand Down
1 change: 1 addition & 0 deletions nansat/mappers/mapper_netcdf_cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def get_band_number():
subds = gdal.Open(fn)
band = subds.GetRasterBand(Context.band_number)
band_metadata = self._clean_band_metadata(band)
band_metadata['_FillValue'] = band.GetNoDataValue()

return self._band_dict(fn, Context.band_number, subds, band=band,
band_metadata=band_metadata)
Expand Down
7 changes: 0 additions & 7 deletions nansat/nansat.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@ class Nansat(Domain, Exporter):
"""

FILL_VALUE = 9.96921e+36
ALT_FILL_VALUE = -10000.

# instance attributes
logger = None
filename = None
Expand Down Expand Up @@ -265,10 +262,6 @@ def _fill_with_nan(self, band, band_data):
"""Fill input array with fill value taen from input band metadata"""
fill_value = float(band.GetMetadata()['_FillValue'])
band_data[band_data == fill_value] = np.nan
# quick hack to avoid problem with wrong _FillValue - see issue
# #123
if fill_value == self.FILL_VALUE:
band_data[band_data == self.ALT_FILL_VALUE] = np.nan

return band_data

Expand Down
55 changes: 54 additions & 1 deletion nansat/tests/test_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from netCDF4 import Dataset

from nansat import Nansat, Domain, NSR
from nansat import Nansat, Domain, NSR, exporter
from nansat.utils import gdal
from nansat.tests.nansat_test_base import NansatTestBase

Expand All @@ -46,6 +46,59 @@

class ExporterTest(NansatTestBase):

@patch('nansat.exporter.importlib.util.find_spec')
def test_xr_export__raises_module_not_found_error(self, mock_find_spec):
"""Test that an error is raised if xarray is not installed."""
mock_find_spec.return_value = None
n = Nansat(self.test_file_arctic)
with self.assertRaises(ModuleNotFoundError) as e:
n.xr_export('test.nc')
self.assertEqual(str(e.exception), "Please install 'xarray'")

@patch.object(exporter.xr.Dataset, 'to_netcdf')
def test_xr_export__default(self, mock_to_netcdf):
"""Test that the to_netcdf function is called for default
usage of xr_export."""
n = Nansat(self.test_file_arctic)
n.xr_export('test.nc')
mock_to_netcdf.assert_called_once_with('test.nc',
encoding={"longitude": {"_FillValue": None}, "latitude": {"_FillValue": None}})

def test_xr_export__one_band(self):
"""Test that only a given band is exported."""
n = Nansat(self.test_file_arctic)
fd, tmp_ncfile = tempfile.mkstemp(suffix='.nc')
n.xr_export(tmp_ncfile, bands=['Bootstrap'])
ds = Dataset(tmp_ncfile)
self.assertEqual(list(ds.variables.keys()), ['Bootstrap', 'longitude', 'latitude'])
os.close(fd)
os.unlink(tmp_ncfile)

def test_xr_export__with_specific_encoding_and_nan_values(self):
"""Test that a band with nan-values is masked as expected,
and with the FillValue specified by the user."""
n = Nansat(self.test_file_arctic)
xx = n['Bootstrap'].astype(float)
xx = np.ma.masked_where(
xx == float(n.get_metadata(band_id='Bootstrap', key='_FillValue')), xx)
xx.data[xx.mask] = np.nan
yy = xx.data
yy[2,:] = np.nan
n.add_band(yy, parameters={'name': 'test_band_with_nans'})
encoding = {
"longitude": {"_FillValue": None},
"latitude": {"_FillValue": None},
"test_band_with_nans": {"_FillValue": 999},
}

fd, tmp_ncfile = tempfile.mkstemp(suffix='.nc')
n.xr_export(tmp_ncfile, encoding=encoding)
ds = Dataset(tmp_ncfile)
# Check that original invalid/masked data equals 999
self.assertEqual(ds.variables['test_band_with_nans'][:].data[xx.mask][0], 999.)
# Check that data set to np.nan equals 999
self.assertEqual(ds.variables['test_band_with_nans'][:].data[2,0], 999.)

def test_geolocation_of_exportedNC_vs_original(self):
""" Lon/lat in original and exported file should coincide """
orig = Nansat(self.test_file_gcps, mapper=self.default_mapper)
Expand Down

0 comments on commit 3ca223e

Please sign in to comment.