Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modified export function to avoid change of attribute names, and added test about fill values #526

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
56996cc
test that history metadata is added correctly
mortenwh Oct 23, 2020
7e5a1f6
updated baseURLs to s1 thredds
mortenwh Jun 11, 2021
fa71c83
Merge branch 'nansencenter:master' into master
mortenwh Jun 11, 2021
9677223
Merge branch 'nansencenter:master' into master
mortenwh Aug 12, 2021
fceb4b5
removed print line which hampers readability - the line was printed e…
mortenwh Jan 26, 2022
6d715b9
added exception handling of value error
mortenwh Jan 26, 2022
68c01b7
Merge branch 'master' of github.com:nansencenter/nansat
mortenwh Jan 26, 2022
1a53f4b
use != instead of is not. Mappername default changed to None would be…
mortenwh Jan 26, 2022
f754a8a
Merge branch 'master' of github.com:mortenwh/nansat
mortenwh Jan 26, 2022
d90e2e3
This is not an error. Might be a warning but it is still annoying, so…
mortenwh Jan 27, 2022
18fc477
Merge remote-tracking branch 'nersc/master'
mortenwh Sep 16, 2022
365ec55
Merge remote-tracking branch 'nersc/master'
mortenwh Dec 22, 2022
21e4334
#525: added function to export with xarray, some cleaning, and tests
mortenwh Dec 23, 2022
0ca6093
#525: ipdb lines were not meant to be committed..
mortenwh Dec 23, 2022
d1be8e9
#525: removed ipdb lines
mortenwh Dec 23, 2022
110789c
#525: remover xarray based export function and modified the export fu…
mortenwh Dec 25, 2022
2c45363
#525: removed unnecessary test
mortenwh Dec 25, 2022
d4d6495
#525: cleaned test code and added one test to demonstrate issue when …
mortenwh Dec 25, 2022
f276d0e
#525: static methods
mortenwh Dec 26, 2022
a671dc8
#525: adjusted netcdf-cf mapper to account for new attribute names wi…
mortenwh Dec 27, 2022
54f17ab
#525: gdal adds its own Conventions attribute with value 'CF-1.5' whi…
mortenwh Jan 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 76 additions & 9 deletions nansat/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import tempfile
import datetime
import warnings
import importlib
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove


from nansat.utils import gdal
import numpy as np
Expand All @@ -30,17 +31,22 @@

from nansat.exceptions import NansatGDALError

try:
import xarray as xr
except:
warnings.warn("'xarray' needs to be installed for Exporter.xr_export to work.")

Comment on lines +34 to +38
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove


class Exporter(object):
"""Abstract class for export functions """
DEFAULT_INSTITUTE = 'NERSC'
DEFAULT_SOURCE = 'satellite remote sensing'

UNWANTED_METADATA = ['dataType', 'SourceFilename', 'SourceBand', '_Unsigned', 'FillValue',
'time', '_FillValue', 'type', 'scale', 'offset']
'_FillValue', 'type', 'scale', 'offset', 'NETCDF_VARNAME']

def export(self, filename='', bands=None, rm_metadata=None, add_geolocation=True,
driver='netCDF', options=None, hardcopy=False):
driver='netCDF', options='FORMAT=NC4', hardcopy=False):
"""Export Nansat object into netCDF or GTiff file

Parameters
Expand All @@ -57,7 +63,8 @@ def export(self, filename='', bands=None, rm_metadata=None, add_geolocation=True
add geolocation array datasets to exported file?
driver : str
Name of GDAL driver (format)
options : str or list
options : str or list (default: 'FORMAT=NC4' for NetCDF 4
file format)
GDAL export options in format of: 'OPT=VAL', or
['OPT1=VAL1', 'OP2='VAL2']
See also http://www.gdal.org/frmt_netcdf.html
Expand All @@ -70,11 +77,6 @@ def export(self, filename='', bands=None, rm_metadata=None, add_geolocation=True

Notes
------
If number of bands is more than one, serial numbers are added at the end of each band name.
It is possible to fix it by changing line.4605 in GDAL/frmts/netcdf/netcdfdataset.cpp :
'if( nBands > 1 ) sprintf(szBandName,"%s%d",tmpMetadata,iBand);'
--> 'if( nBands > 1 ) sprintf(szBandName,"%s",tmpMetadata);'

CreateCopy fails in case the band name has special characters,
e.g. the slash in 'HH/VV'.

Expand Down Expand Up @@ -116,14 +118,79 @@ def export(self, filename='', bands=None, rm_metadata=None, add_geolocation=True
add_gcps = export_vrt.prepare_export_netcdf()

# Create output file using GDAL
dataset = gdal.GetDriverByName(driver).CreateCopy(filename, export_vrt.dataset, options=options)
dataset = gdal.GetDriverByName(driver).CreateCopy(filename, export_vrt.dataset,
options=options)
del dataset
# add GCPs into netCDF file as separate float variables
if add_gcps:
Exporter._add_gcps(filename, export_vrt.dataset.GetGCPs())

if driver=='netCDF':
# Rename variable names to get rid of the band numbers
self.rename_variables(filename)
# Rename attributes to get rid of "GDAL_" added by gdal
self.rename_attributes(filename)

self.logger.debug('Export - OK!')

@staticmethod
def rename_attributes(filename):
""" Rename global attributes to get rid of the "GDAL_"-string
added by gdal.
"""
GDAL = "GDAL_"
del_attrs = []
rename_attrs = []
# Open new file to edit attribute names
ds = Dataset(filename, 'r+')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use with

""" The netcdf driver adds the Conventions attribute with
value CF-1.5. This may be wrong, so it is better to use the
Conventions metadata from the Nansat object. Other attributes
added by gdal that are already present in Nansat, should also
be deleted."""
for attr in ds.ncattrs():
if GDAL in attr:
if attr.replace(GDAL, "") in ds.ncattrs():
# Mark the attribute created by the netcdf driver
# for deletion - ref above comment
del_attrs.append(attr.replace(GDAL, ""))
# Mark for renaming
rename_attrs.append(attr)

# Delete repeated attributes..
for attr in del_attrs:
ds.delncattr(attr)
# Rename attributes:
for attr in rename_attrs:
ds.renameAttribute(attr, attr.replace(GDAL, ""))
ds.close()

@staticmethod
def rename_variables(filename):
""" Rename variable names to reflect the name attribute of
the variable's metadata.

Parameters
----------
filename : str
NetCDF file name
"""
# Open new file to edit variable names
ds = Dataset(filename, 'r+')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use with..


# Decide which variables to rename
rename_vars = []
for var in ds.variables.keys():
if ('name' in ds.variables[var].ncattrs()) and (
var != ds.variables[var].getncattr('name')):
rename_vars.append(var)

# Rename selected variables
for var in rename_vars:
ds.renameVariable(var, ds.variables[var].getncattr('name'))

ds.close()

def export2thredds(self,
filename,
bands=None,
Expand Down
2 changes: 1 addition & 1 deletion nansat/mappers/mapper_globcolour_l3m.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, filename, gdalDataset, gdalMetadata, **kwargs):
''' GLOBCOLOR L3M VRT '''

try:
print("=>%s<=" % gdalMetadata['NC_GLOBAL#title'])
x = "=>%s<=" % gdalMetadata['NC_GLOBAL#title']
except (TypeError, KeyError):
raise WrongMapperError

Expand Down
9 changes: 6 additions & 3 deletions nansat/mappers/mapper_netcdf_cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ def __init__(self, filename, gdal_dataset, gdal_metadata, *args, **kwargs):
raise WrongMapperError

if 'NC_GLOBAL#GDAL_NANSAT_GCPY_000' in list(gdal_metadata.keys()) or \
'NC_GLOBAL#GDAL_NANSAT_GCPProjection' in list(gdal_metadata.keys()):
# Probably Nansat generated netcdf of swath data - see issue #192
'NC_GLOBAL#GDAL_NANSAT_GCPProjection' in list(gdal_metadata.keys()) or \
'NC_GLOBAL#NANSAT_GCPY_000' in list(gdal_metadata.keys()) or \
'NC_GLOBAL#NANSAT_GCPProjection' in list(gdal_metadata.keys()):
# Nansat generated netcdf of swath data is not standard CF
raise WrongMapperError

metadata = VRT._remove_strings_in_metadata_keys(gdal_metadata,
Expand Down Expand Up @@ -277,6 +279,7 @@ def get_band_number():
subds = gdal.Open(fn)
band = subds.GetRasterBand(Context.band_number)
band_metadata = self._clean_band_metadata(band)
band_metadata['_FillValue'] = band.GetNoDataValue()

return self._band_dict(fn, Context.band_number, subds, band=band,
band_metadata=band_metadata)
Expand Down Expand Up @@ -317,7 +320,7 @@ def _band_dict(self, subfilename, band_num, subds, band=None, band_metadata=None
try:
band_metadata['time_iso_8601'] = self._time_count_to_np_datetime64(
band_metadata[timecountname])
except KeyError as e:
except (ValueError, KeyError) as e:
# No timing information available for this band - it is
# probably a constant, such as land area fraction or similar.
# Then we don't need time for this band...
Expand Down
2 changes: 2 additions & 0 deletions nansat/mappers/mapper_opendap_sentinel1.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
class Mapper(Opendap, Sentinel1):

baseURLs = [
'https://nbstds.met.no/thredds/dodsC/NBS/S1A',
'https://nbstds.met.no/thredds/dodsC/NBS/S1B',
'http://nbstds.met.no/thredds/dodsC/NBS/S1A',
'http://nbstds.met.no/thredds/dodsC/NBS/S1B',
]
Expand Down
11 changes: 2 additions & 9 deletions nansat/nansat.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@ class Nansat(Domain, Exporter):

"""

FILL_VALUE = 9.96921e+36
ALT_FILL_VALUE = -10000.

# instance attributes
logger = None
filename = None
Expand Down Expand Up @@ -265,10 +262,6 @@ def _fill_with_nan(self, band, band_data):
"""Fill input array with fill value taen from input band metadata"""
fill_value = float(band.GetMetadata()['_FillValue'])
band_data[band_data == fill_value] = np.nan
# quick hack to avoid problem with wrong _FillValue - see issue
# #123
if fill_value == self.FILL_VALUE:
band_data[band_data == self.ALT_FILL_VALUE] = np.nan

return band_data

Expand Down Expand Up @@ -1067,7 +1060,7 @@ def _get_dataset_metadata(self):
try:
gdal_dataset = gdal.Open(self.filename)
except RuntimeError:
self.logger.error('GDAL could not open %s, trying to read with Nansat mappers...'
self.logger.debug('GDAL could not open %s, trying to read with Nansat mappers...'
% self.filename)
if gdal_dataset is not None:
# get metadata from the GDAL dataset
Expand Down Expand Up @@ -1122,7 +1115,7 @@ def _get_mapper(self, mappername, **kwargs):
tmp_vrt = None

# TODO: There seems to be code repetition in this if-test - should be avoided...
if mappername is not '':
if mappername != '':
# If a specific mapper is requested, we test only this one.
# get the module name
mappername = 'mapper_' + mappername.replace('mapper_', '').replace('.py', '').lower()
Expand Down
61 changes: 59 additions & 2 deletions nansat/tests/test_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from netCDF4 import Dataset

from nansat import Nansat, Domain, NSR
from nansat import Nansat, Domain, NSR, exporter
from nansat.utils import gdal
from nansat.tests.nansat_test_base import NansatTestBase

Expand All @@ -46,6 +46,61 @@

class ExporterTest(NansatTestBase):

def test_export__with_nan_values(self):
"""Test that a band with nan-values is masked as expected,
that global attribute names are the same as the Nansat
metadata, and that variable names are the same as the Nansat
band names without appended numbers."""
n = Nansat(self.test_file_arctic)
aa = n['Bootstrap'].astype(float)
aa = np.ma.masked_where(
aa == float(n.get_metadata(band_id='Bootstrap', key='_FillValue')), aa)
aa.data[aa.mask] = np.nan
bb = aa.data.copy()
bb[2,:] = np.nan
n.add_band(bb, parameters={'name': 'test_band_with_nans'})

cc = np.zeros(bb.shape)
cc[np.isnan(bb)] = 1
sumnan = cc.sum()

# temp file for exported netcdf
fd, tmp_ncfile = tempfile.mkstemp(suffix='.nc')

# export with nansat
n.export(tmp_ncfile)

ds = Dataset(tmp_ncfile)
# Check that the number of elements in bb and the new file
# equal to np.nan is the same when opened with netCDF4.Dataset:
dd = ds.variables['test_band_with_nans'][:]
ee = np.zeros(dd.shape)
ee[np.isnan(dd)] = 1
self.assertEqual(ee.sum(), sumnan)

# Check that the global metadata attribute names are the same
orig_metadata = list(n.get_metadata().keys())
ncattrs = ds.ncattrs()
for attr in orig_metadata:
self.assertIn(attr, ncattrs)

# Check that variable names don't contain band numbers
self.assertEqual(list(ds.variables.keys()), ['polar_stereographic', 'x', 'y',
'UMass_AES', 'Bootstrap', 'Bristol', 'test_band_with_nans'])

# Open the tmp file using nansat, and check that the Bootstrap
# band from the tmp file is the same as the one in the original
n = Nansat(tmp_ncfile)
ff = n['Bootstrap']
ff = np.ma.masked_where(
ff == float(n.get_metadata(band_id='Bootstrap', key='_FillValue')), ff)
# This only works when the fill value (-10000) in arctic.nc is
# treated correctly (with python3.9 and GDAL3.4.1)
self.assertEqual(ff.mask[0,239], aa.mask[0,239])

os.close(fd)
os.unlink(tmp_ncfile)

def test_geolocation_of_exportedNC_vs_original(self):
""" Lon/lat in original and exported file should coincide """
orig = Nansat(self.test_file_gcps, mapper=self.default_mapper)
Expand Down Expand Up @@ -409,7 +464,9 @@ def test_example1(self):

def test_example2(self):
n = Nansat(self.tmp_ncfile)
res = n.export2thredds(self.filename_exported, {'x_wind_10m': {'description': 'example'}})
res = n.export2thredds(
self.filename_exported,
{'x_wind_10m': {'description': 'example'}})
self.assertEqual(res, None)

def test_example3(self):
Expand Down