Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix corruption of FITS tables with unsigned int columns on save and load #3680

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions jwst/combine_1d/combine1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,7 @@ def __init__(self, ms, spec, exptime_key):
self.surf_bright = np.zeros_like(self.flux)
self.sb_error = np.zeros_like(self.flux)
log.warning("There is no SURF_BRIGHT column in the input.")
# xxx temporary self.dq = spec.spec_table.field("dq").copy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only the "# xxx temporary " part of this line should be deleted. The actual assignment to self.dq needs to be done.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! Oops, thanks.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(this has been fixed)

# xxx This is a workaround which should be deleted after the
# bug described in JP-789, GitHub issues #3655 and #3179 have
# been fixed.
self.dq = np.zeros(self.wavelength.shape, dtype=np.uint32) # xxx
self.dq = spec.spec_table.field("dq").copy()
self.nelem = self.wavelength.shape[0]
self.unit_weight = False # may be reset below
self.right_ascension = np.zeros_like(self.wavelength)
Expand Down
4 changes: 4 additions & 0 deletions jwst/datamodels/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ def _as_fitsrec(val):
return val
else:
coldefs = fits.ColDefs(val)
uint = any(c._pseudo_unsigned_ints for c in coldefs)
fits_rec = fits.FITS_rec(val)
fits_rec._coldefs = coldefs
# FITS_rec needs to know if it should be operating in pseudo-unsigned-ints mode,
# otherwise it won't properly convert integer columns with TZEROn before saving.
fits_rec._uint = uint
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this used in astropy.io.fits?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, here. The else... is where it was changing the dtype to float64.

return fits_rec


Expand Down
59 changes: 59 additions & 0 deletions jwst/datamodels/tests/test_fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,65 @@ def test_replace_table():
assert hdulist[1].header['TFORM2'] == 'D'


def test_table_with_unsigned_int():
schema = {
'title': 'Test data model',
'$schema': 'http://stsci.edu/schemas/fits-schema/fits-schema',
'type': 'object',
'properties': {
'meta': {
'type': 'object',
'properties': {}
},
'test_table': {
'title': 'Test table',
'fits_hdu': 'TESTTABL',
'datatype': [
{'name': 'FLOAT64_COL', 'datatype': 'float64'},
{'name': 'UINT32_COL', 'datatype': 'uint32'}
]
}
}
}

dm = DataModel(schema=schema)

float64_info = np.finfo(np.float64)
float64_arr = np.random.uniform(size=(10,))
float64_arr[0] = float64_info.min
float64_arr[-1] = float64_info.max

uint32_info = np.iinfo(np.uint32)
uint32_arr = np.random.randint(uint32_info.min, uint32_info.max + 1, size=(10,), dtype=np.uint32)
uint32_arr[0] = uint32_info.min
uint32_arr[-1] = uint32_info.max

test_table = np.array(list(zip(float64_arr, uint32_arr)), dtype=dm.test_table.dtype)

def assert_table_correct(model):
for idx, (col_name, col_data) in enumerate([('float64_col', float64_arr), ('uint32_col', uint32_arr)]):
# The table dtype and field dtype are stored separately, and may not
# necessarily agree.
assert np.can_cast(model.test_table.dtype[idx], col_data.dtype, 'equiv')
assert np.can_cast(model.test_table.field(col_name).dtype, col_data.dtype, 'equiv')
assert np.array_equal(model.test_table.field(col_name), col_data)

# The datamodel casts our array to FITS_rec on assignment, so here we're
# checking that the data survived the casting.
dm.test_table = test_table
assert_table_correct(dm)

# Confirm that saving the table (and converting the uint32 values to signed int w/TZEROn)
# doesn't mangle the data.
dm.save(TMP_FITS)
assert_table_correct(dm)

# Confirm that the data loads from the file intact (converting the signed ints back to
# the appropriate uint32 values).
dm2 = DataModel(TMP_FITS, schema=schema)
assert_table_correct(dm2)


def test_metadata_from_fits():
from astropy.io import fits

Expand Down
11 changes: 11 additions & 0 deletions jwst/datamodels/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,17 @@ def gentle_asarray(a, dtype):
else:
return np.asanyarray(a, dtype=out_dtype)
elif in_dtype.fields is not None and out_dtype.fields is not None:
# When a FITS file includes a pseudo-unsigned-int column, astropy will return
# a FITS_rec with an incorrect table dtype. The following code rebuilds
# in_dtype from the individual field dtypes, which are all correct.
# We can remove this once the issue is resolved in astropy:
# https://github.com/astropy/astropy/issues/8862
if isinstance(a, fits.fitsrec.FITS_rec):
new_in_dtype = []
for field_name in in_dtype.fields:
new_in_dtype.append((field_name, a.field(field_name).dtype))
in_dtype = np.dtype(new_in_dtype)

if in_dtype == out_dtype:
return a
in_names = {n.lower() for n in in_dtype.names}
Expand Down