From 5e38f0e6356e241215b6f39c73fc7febc9e82b91 Mon Sep 17 00:00:00 2001 From: Ed Slavich Date: Mon, 17 Jun 2019 13:42:07 -0400 Subject: [PATCH 1/4] Fix corruption of FITS tables with unsigned int columns on save and load --- jwst/combine_1d/combine1d.py | 5 --- jwst/datamodels/properties.py | 4 ++ jwst/datamodels/tests/test_fits.py | 61 +++++++++++++++++++++++++++++- jwst/datamodels/util.py | 11 ++++++ 4 files changed, 75 insertions(+), 6 deletions(-) diff --git a/jwst/combine_1d/combine1d.py b/jwst/combine_1d/combine1d.py index 303be25366..587604ff9c 100644 --- a/jwst/combine_1d/combine1d.py +++ b/jwst/combine_1d/combine1d.py @@ -58,11 +58,6 @@ def __init__(self, ms, spec, exptime_key): self.surf_bright = np.zeros_like(self.flux) self.sb_error = np.zeros_like(self.flux) log.warning("There is no SURF_BRIGHT column in the input.") - # xxx temporary self.dq = spec.spec_table.field("dq").copy() - # xxx This is a workaround which should be deleted after the - # bug described in JP-789, GitHub issues #3655 and #3179 have - # been fixed. - self.dq = np.zeros(self.wavelength.shape, dtype=np.uint32) # xxx self.nelem = self.wavelength.shape[0] self.unit_weight = False # may be reset below self.right_ascension = np.zeros_like(self.wavelength) diff --git a/jwst/datamodels/properties.py b/jwst/datamodels/properties.py index e0e7c305bb..e25173aa64 100644 --- a/jwst/datamodels/properties.py +++ b/jwst/datamodels/properties.py @@ -56,8 +56,12 @@ def _as_fitsrec(val): return val else: coldefs = fits.ColDefs(val) + uint = any(c._pseudo_unsigned_ints for c in coldefs) fits_rec = fits.FITS_rec(val) fits_rec._coldefs = coldefs + # FITS_rec needs to know if it should be operating in pseudo-unsigned-ints mode, + # otherwise it won't properly convert integer columns with TZEROn before saving. + fits_rec._uint = uint return fits_rec diff --git a/jwst/datamodels/tests/test_fits.py b/jwst/datamodels/tests/test_fits.py index 1a790442d0..c6e2e70353 100644 --- a/jwst/datamodels/tests/test_fits.py +++ b/jwst/datamodels/tests/test_fits.py @@ -1,6 +1,7 @@ import os import shutil import tempfile +from collections import OrderedDict import pytest @@ -398,6 +399,65 @@ def test_replace_table(): assert hdulist[1].header['TFORM2'] == 'D' +def test_table_with_unsigned_int(): + schema = { + 'title': 'Test data model', + '$schema': 'http://stsci.edu/schemas/fits-schema/fits-schema', + 'type': 'object', + 'properties': { + 'meta': { + 'type': 'object', + 'properties': {} + }, + 'test_table': { + 'title': 'Test table', + 'fits_hdu': 'TESTTABL', + 'datatype': [ + {'name': 'FLOAT64_COL', 'datatype': 'float64'}, + {'name': 'UINT32_COL', 'datatype': 'uint32'} + ] + } + } + } + + dm = DataModel(schema=schema) + + float64_info = np.finfo(np.float64) + float64_arr = np.random.uniform(size=(10,)) + float64_arr[0] = float64_info.min + float64_arr[-1] = float64_info.max + + uint32_info = np.iinfo(np.uint32) + uint32_arr = np.random.randint(uint32_info.min, uint32_info.max + 1, size=(10,), dtype=np.uint32) + uint32_arr[0] = uint32_info.min + uint32_arr[-1] = uint32_info.max + + test_table = np.array(list(zip(float64_arr, uint32_arr)), dtype=dm.test_table.dtype) + + def assert_table_correct(model): + for idx, (col_name, col_data) in enumerate([('float64_col', float64_arr), ('uint32_col', uint32_arr)]): + # The table dtype and field dtype are stored separately, and may not + # necessarily agree. + assert np.can_cast(model.test_table.dtype[idx], col_data.dtype, 'equiv') + assert np.can_cast(model.test_table.field(col_name).dtype, col_data.dtype, 'equiv') + assert np.array_equal(model.test_table.field(col_name), col_data) + + # The datamodel casts our array to FITS_rec on assignment, so here we're + # checking that the data survived the casting. + dm.test_table = test_table + assert_table_correct(dm) + + # Confirm that saving the table (and converting the uint32 values to signed int w/TZEROn) + # doesn't mangle the data. + dm.save(TMP_FITS) + assert_table_correct(dm) + + # Confirm that the data loads from the file intact (converting the signed ints back to + # the appropriate uint32 values). + dm2 = DataModel(TMP_FITS, schema=schema) + assert_table_correct(dm2) + + def test_metadata_from_fits(): from astropy.io import fits @@ -409,7 +469,6 @@ def test_metadata_from_fits(): with fits.open(TMP_FITS2) as hdulist: assert hdulist[2].name == 'ASDF' - # def test_float_as_int(): # from astropy.io import fits diff --git a/jwst/datamodels/util.py b/jwst/datamodels/util.py index e2fcd7456a..092a51c019 100644 --- a/jwst/datamodels/util.py +++ b/jwst/datamodels/util.py @@ -316,6 +316,17 @@ def gentle_asarray(a, dtype): else: return np.asanyarray(a, dtype=out_dtype) elif in_dtype.fields is not None and out_dtype.fields is not None: + # When a FITS file includes a pseudo-unsigned-int column, astropy will return + # a FITS_rec with an incorrect table dtype. The following code rebuilds + # in_dtype from the individual field dtypes, which are all correct. + # We can remove this once the issue is resolved in astropy: + # https://github.com/astropy/astropy/issues/8862 + if isinstance(a, fits.fitsrec.FITS_rec): + new_in_dtype = [] + for field_name in in_dtype.fields: + new_in_dtype.append((field_name, a.field(field_name).dtype)) + in_dtype = np.dtype(new_in_dtype) + if in_dtype == out_dtype: return a in_names = {n.lower() for n in in_dtype.names} From 7374f6bf3ac878af272c91a02ceed4fc6e567dfa Mon Sep 17 00:00:00 2001 From: Ed Slavich Date: Mon, 17 Jun 2019 14:01:55 -0400 Subject: [PATCH 2/4] Remove unnecessary import --- jwst/datamodels/tests/test_fits.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jwst/datamodels/tests/test_fits.py b/jwst/datamodels/tests/test_fits.py index c6e2e70353..9886158a3d 100644 --- a/jwst/datamodels/tests/test_fits.py +++ b/jwst/datamodels/tests/test_fits.py @@ -1,7 +1,6 @@ import os import shutil import tempfile -from collections import OrderedDict import pytest From 70f77ea802d4dc88fd838050a988046bd57c2575 Mon Sep 17 00:00:00 2001 From: Ed Slavich Date: Mon, 17 Jun 2019 14:03:37 -0400 Subject: [PATCH 3/4] Remove another silly diff --- jwst/datamodels/tests/test_fits.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jwst/datamodels/tests/test_fits.py b/jwst/datamodels/tests/test_fits.py index 9886158a3d..ec68edc94a 100644 --- a/jwst/datamodels/tests/test_fits.py +++ b/jwst/datamodels/tests/test_fits.py @@ -468,6 +468,7 @@ def test_metadata_from_fits(): with fits.open(TMP_FITS2) as hdulist: assert hdulist[2].name == 'ASDF' + # def test_float_as_int(): # from astropy.io import fits From 76f94f6b891adeb24a601faeed240037bae5d273 Mon Sep 17 00:00:00 2001 From: Ed Slavich Date: Mon, 17 Jun 2019 15:27:45 -0400 Subject: [PATCH 4/4] Don't delete the good parts --- jwst/combine_1d/combine1d.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jwst/combine_1d/combine1d.py b/jwst/combine_1d/combine1d.py index 587604ff9c..22cbf0c371 100644 --- a/jwst/combine_1d/combine1d.py +++ b/jwst/combine_1d/combine1d.py @@ -58,6 +58,7 @@ def __init__(self, ms, spec, exptime_key): self.surf_bright = np.zeros_like(self.flux) self.sb_error = np.zeros_like(self.flux) log.warning("There is no SURF_BRIGHT column in the input.") + self.dq = spec.spec_table.field("dq").copy() self.nelem = self.wavelength.shape[0] self.unit_weight = False # may be reset below self.right_ascension = np.zeros_like(self.wavelength)