Skip to content

Commit

Permalink
sigmffile: separate data_offset and data_size
Browse files Browse the repository at this point in the history
These elements are usually checked independently, separate them into two
arguments to facilitate checks.

Signed-off-by: Liam Beguin <[email protected]>
  • Loading branch information
liambeguin committed Dec 24, 2023
1 parent acbbe45 commit ec715b2
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 15 deletions.
20 changes: 14 additions & 6 deletions sigmf/archivereader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None')

json_contents = None
data_offset_size = None
data_offset = None
data_size_bytes = None

for memb in tar_obj.getmembers():
if memb.isdir(): # memb.type == tarfile.DIRTYPE:
Expand All @@ -52,7 +53,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
elif memb.isfile(): # memb.type == tarfile.REGTYPE:
if memb.name.endswith(SIGMF_METADATA_EXT):
json_contents = memb.name
if data_offset_size is None:
if data_offset is None:
# consider a warnings.warn() here; the datafile should be earlier in the
# archive than the metadata, so that updating it (like, adding an annotation)
# is fast.
Expand All @@ -61,21 +62,28 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
json_contents = memb_fid.read()

elif memb.name.endswith(SIGMF_DATASET_EXT):
data_offset_size = memb.offset_data, memb.size
data_offset = memb.offset_data
data_size_bytes = memb.size

else:
print('A regular file', memb.name, 'was found but ignored in the archive')
else:
print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.')

if data_offset_size is None:
if data_offset is None:
raise SigMFFileError('No .sigmf-data file found in archive!')

self.sigmffile = SigMFFile(metadata=json_contents)
valid_md = self.sigmffile.validate()

self.sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0],
size_bytes=data_offset_size[1], map_readonly=map_readonly)
self.sigmffile.set_data_file(
self.name,
data_buffer=archive_buffer,
skip_checksum=skip_checksum,
offset=data_offset,
size_bytes=data_size_bytes,
map_readonly=map_readonly,
)

self.ndim = self.sigmffile.ndim
self.shape = self.sigmffile.shape
Expand Down
14 changes: 9 additions & 5 deletions sigmf/sigmf_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,27 @@
import os


def calculate_sha512(filename=None, fileobj=None, offset_and_size=None):
def calculate_sha512(filename=None, fileobj=None, offset=None, size=None):
"""
Return sha512 of file or fileobj.
"""
the_hash = hashlib.sha512()
bytes_to_hash = size
bytes_read = 0

if filename is not None:
fileobj = open(filename, "rb")
if offset_and_size is None:
if size is None:
bytes_to_hash = os.path.getsize(filename)
else:
fileobj.seek(offset_and_size[0])
bytes_to_hash = offset_and_size[1]
bytes_read = 0
fileobj.seek(offset)

while bytes_read < bytes_to_hash:
buff = fileobj.read(min(4096, (bytes_to_hash - bytes_read)))
the_hash.update(buff)
bytes_read += len(buff)

if filename is not None:
fileobj.close()

return the_hash.hexdigest()
9 changes: 5 additions & 4 deletions sigmf/sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def _count_samples(self):
sample_count = 0
else:
header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
file_size = path.getsize(self.data_file) if self.offset_and_size is None else self.offset_and_size[1]
file_size = path.getsize(self.data_file) if self.data_size_bytes is None else self.data_size_bytes
file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes # bytes
sample_size = self.get_sample_size() # size of a sample in bytes
num_channels = self.get_num_channels()
Expand All @@ -449,9 +449,9 @@ def calculate_hash(self):
"""
old_hash = self.get_global_field(self.HASH_KEY)
if self.data_file is not None:
new_hash = sigmf_hash.calculate_sha512(self.data_file, offset_and_size=self.offset_and_size)
new_hash = sigmf_hash.calculate_sha512(self.data_file, offset=self.data_offset, size=self.data_size_bytes)
else:
new_hash = sigmf_hash.calculate_sha512(fileobj=self.data_buffer, offset_and_size=self.offset_and_size)
new_hash = sigmf_hash.calculate_sha512(fileobj=self.data_buffer, offset=self.data_offset, size=self.data_size_bytes)
if old_hash:
if old_hash != new_hash:
raise SigMFFileError('Calculated file hash does not match associated metadata.')
Expand All @@ -469,7 +469,8 @@ def set_data_file(self, data_file=None, data_buffer=None, skip_checksum=False, o

self.data_file = data_file
self.data_buffer = data_buffer
self.offset_and_size = None if (offset == 0 and size_bytes is None) else (offset, size_bytes)
self.data_offset = offset
self.data_size_bytes = size_bytes
self._count_samples()

dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY))
Expand Down

0 comments on commit ec715b2

Please sign in to comment.