From 71c55835a564a85df0936c895d84a7f01e5b05b4 Mon Sep 17 00:00:00 2001 From: Kenneth Macdonald Date: Fri, 14 Aug 2020 10:16:32 -0400 Subject: [PATCH 01/47] Adding basic edit command to asdftool. --- asdf/commands/__init__.py | 3 +- asdf/commands/edit.py | 300 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 asdf/commands/edit.py diff --git a/asdf/commands/__init__.py b/asdf/commands/__init__.py index 7e8e41cc8..aa70f659b 100644 --- a/asdf/commands/__init__.py +++ b/asdf/commands/__init__.py @@ -7,9 +7,10 @@ from .tags import list_tags from .extension import find_extensions from .info import info +from .edit import edit -__all__ = ['implode', 'explode', 'to_yaml', 'defragment', 'diff', 'list_tags', +__all__ = ['edit', 'implode', 'explode', 'to_yaml', 'defragment', 'diff', 'list_tags', 'find_extensions', 'info'] diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py new file mode 100644 index 000000000..759f40e77 --- /dev/null +++ b/asdf/commands/edit.py @@ -0,0 +1,300 @@ +""" +Contains commands for dealing with exploded and imploded forms. +""" + + +import os +import sys + +import asdf +from .main import Command +from .. import AsdfFile + + +__all__ = ['edit'] + + +class ToYaml(Command): + @classmethod + def setup_arguments(cls, subparsers): + desc_string = "Allows for easy editing of the YAML in an ASDF file. " \ + "For edit mode, the YAML portion of an ASDF file is" \ + "separated from the ASDF into a text file for easy" \ + "editing. For save mode, the edited text file is written" \ + "to its ASDF file." + + parser = subparsers.add_parser( + str("edit"), help="Edit YAML portion of an ASDF file.", + description=desc_string) + + parser.add_argument( + '--infile', '-f', type=str, required=True, dest='fname', + help="Input file") + + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + '-s',action='store_true',dest='save', + help="Saves a YAML text file to its ASDF file. Requires an ASDF input file.") + group.add_argument( + '-e',action='store_true',dest='edit', + help="Create a YAML text file for a ASDF file. Requires a YAML input file.") + + parser.set_defaults(func=cls.run) + + return parser + + @classmethod + def run(cls, args): + return edit(args) + +def is_yaml_file ( fname ) : + ''' + Determines if a file is a YAML file based only on the file extension. + + Parameters + ---------- + fname : The input file name. + ''' + + base, ext = os.path.splitext(fname) + if '.yaml' != ext : + return False + return True + +def is_asdf_file ( fname ) : + ''' + Determines if a file is ASDF based on file extension and the first + 5 bytes of the file, which should be '#ASDF'. + + Parameters + ---------- + fname : The input file name. + ''' + + base, ext = os.path.splitext(fname) + if '.asdf' != ext : + return False + + with open(fname,"r+b") as fd : + first_string = "#ASDF" + first_line = fd.read(len(first_string)).decode('utf-8') + if first_string != first_line : + return False + + return True + +def get_yaml ( fname, return_yaml=False ) : + ''' + Reads all bytes from an ASDF file up to the '\n...\n' delimiter, which + separates the YAML text from the binary data. The location of the + first byte of the delimiter is alwyas returned. When requested, the + YAML text is also, returned. + + Parameters + ---------- + fname : The input file name. + return_yaml : the boolean flag to return the YAML text + ''' + + chunk_size = 1024 # Arbitrary chunk to read + dstart = b'\x0a\x2e\x2e\x2e\x0a' # The binary data delimiter - '\n...\n' + dlen = len(dstart) # Length of binary delimiter + + with open(fname,"r+b") as fd : + dfound = False # No data found, yet + fbytes = fd.read(chunk_size) + chunk_cnt = 0 + chunk_start = 0 + chunk_end = chunk_start + chunk_size - dlen + while not dfound : + for k in range(chunk_start,chunk_end) : + if dstart==fbytes[k:k+dlen] : # Check for the data delimiter + dfound = True + if return_yaml : + return k, fbytes[:k].decode('utf-8') + else : + return k, '' + chunk_cnt = chunk_cnt + 1 # Count the number of chunks read + cbytes = fd.read(chunk_size) + if cbytes is None : + return -1, '' # EOF without finding delimiter + fbytes += cbytes # Save all bytes read + chunk_start = chunk_cnt * chunk_size - dlen + chunk_end = chunk_start + chunk_size + + return -1, '' # EOF without finding delimiter + +def get_yaml_name ( fname ) : + ''' + Using the base ASDF name, create a corresponding YAML file name. + + Parameters + ---------- + fname : The input file name. + ''' + base, ext = os.path.splitext(fname) + return base + '.yaml' + +def get_asdf_name ( fname ) : + ''' + Using the base YAML name, create a corresponding ASDF file name. + + Parameters + ---------- + fname : The input file name. + ''' + base, ext = os.path.splitext(fname) + return base + '.asdf' + +def edit_func ( fname ) : + """ + Creates a YAML file from an ASDF file. The YAML file will contain only the + YAML from the ASDF file. The YAML text will be written to a YAML text file + in the same, so from 'example.asdf' the file 'example.yaml' will be created. + + Parameters + ---------- + fname : The input file name. + """ + + # TODO - validate an ASDF file + fullpath = os.path.abspath(fname) + if not is_asdf_file(fullpath) : + print(f"To use the '-e' option, as ASDF file must be inputted.") + print(f"The file is not an ASDF: \n'{fullpath}'\n") + return False + + # Get YAML from ASDF and its end location in the YAML file + loc, yaml_string = get_yaml(fullpath,return_yaml=True) + if -1==loc : + print(f"Could not find the YAML of '{fullpath}'",file=sys.stderr) + sys.exit(1) + + # Open YAML file + fullyaml = get_yaml_name(fullpath) + + # Write all YAML from ASDF to YAML + with open(fullyaml,"w") as fd : + fd.write(f"{yaml_string}") + # Tell user + delim = '*' * 65 + print(f"{delim}") + print(f"A YAML text file has been created at:\n'{fullyaml}'\n") + print("Edit this file in any text editor, then run the following command") + print("to save YAML edits to the ASDF file:\n") + print(f"'asdftool edit -s --infile {fullyaml}") + print(f"\n{delim}") + + +def get_yaml_with_no_trailing_whitespace ( yamlpath ) : + ''' + Get the YAML text from an ASDF file and remove any trailing whitespace. + + Parameters + ---------- + fname : The input YAML file. + ''' + with open(yamlpath,"r") as fd : + yaml = fd.read() + return yaml.rstrip() + + return '' + +def save_func ( fname ) : + """ + Checks to makes sure a corresponding ASDF file exists. This is done by + seeing if a file of the same name with '.asdf' as an extension exists. + Checks to makes sure fname is a valid YAML file. + If the YAML text is smaller than the YAML text in the ASDF file + overwrite the YAML in the ASDF file. + If the YAML text is smaller than the YAML text in the ASDF file + If the file is small, then rewrite file. + If the file is large, ask if rewrite is desired. + + Parameters + ---------- + fname : The input YAML file. + """ + _1G = 1000**3 # 1 gig + C = 1 # constant multiple of gig + SMALL_FILE_SIZE = C * _1G + + fullpath = os.path.abspath(fname) + fullasdf = get_asdf_name(fullpath) + if not is_yaml_file(fullpath) : # Validate YAML file + print(f"To use the '-s' option, as YAML fle must be inputted.") + print(f"The file is not a YAML: \n'{fullpath}'\n") + return False + + # Check to see if a corresponding ASDF file exists + if not os.path.exists(fullasdf) : + print(f"Error: ASDF file does not exist '{fullasdf}'",file=sys.stderr) + + # Find end of YAML in ASDF + loc, yaml_string = get_yaml(fullasdf,return_yaml=False) + if -1==loc : + print(f"Could not find the YAML of '{fullasdf}'",file=sys.stderr) + sys.exit(1) + + # Read YAML + yaml = get_yaml_with_no_trailing_whitespace(fullpath) + yaml_bytes = bytes(yaml,'utf-8') + + # TODO - validate YAML format and schema (maybe do this else where) + + # If larger than YAML in ASDF + # TODO - Investigate python module fileinput + #print(f"loc = {loc}, len(yaml) = {len(yaml)}") + if loc == len(yaml_bytes) : + #with open(fullasdf,"w") as fd : + with open(fullasdf,"r+b") as fd : + fd.write(yaml_bytes) + print("Good write") + elif loc > len(yaml_bytes) : + diff = loc - len(yaml_bytes) + # pad out YAML with spaces to ensure the entire YAML portion is overwritten + whitespace = ' ' * diff + bwrite = yaml_bytes + bytes(whitespace,'utf-8') + with open(fullasdf,"r+b") as fd : + fd.write(bwrite) + else : + # TODO - add functionality to detect the size of the ASDF file. If it's + # smaller than a specific size rewrire the whole file. If it's + # larger than a specific size tell the user to see if he wants a + # rewrite. + print(f"\n\nYAML text ({len(yaml):,} bytes) in\n '{fullpath}'") + print(f"is larger than available space ({loc} bytes) in") + print(f" {fullasdf}\n\n") + asdf_size = os.path.getsize(fullasdf) + if asdf_size < SMALL_FILE_SIZE : + print(f"asdf_size = {asdf_size:,} and is less than {SMALL_FILE_SIZE:,} bytes") + else : + print(f"asdf_size = {asdf_size:,} and is greater than {SMALL_FILE_SIZE:,} bytes") + print("\n") + + + +def edit ( args ) : + """ + Implode a given ASDF file, which may reference external data, back + into a single ASDF file. + + Parameters + ---------- + args : The command line arguments. + """ + if args.edit : + return edit_func(args.fname) + elif args.save : + return save_func(args.fname) + else : + return print("Invalid arguments") + + + + + + + + + From 420d18d69641f529b8c4e305210f484ca165cb08 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Mon, 31 Aug 2020 09:14:57 -0400 Subject: [PATCH 02/47] Updating the edit.py file. --- asdf/commands/edit.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 759f40e77..cb1aab734 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -14,7 +14,7 @@ __all__ = ['edit'] -class ToYaml(Command): +class Edit(Command): @classmethod def setup_arguments(cls, subparsers): desc_string = "Allows for easy editing of the YAML in an ASDF file. " \ @@ -23,14 +23,17 @@ def setup_arguments(cls, subparsers): "editing. For save mode, the edited text file is written" \ "to its ASDF file." + # Set up the parser parser = subparsers.add_parser( str("edit"), help="Edit YAML portion of an ASDF file.", description=desc_string) + # Need an input file parser.add_argument( '--infile', '-f', type=str, required=True, dest='fname', help="Input file") + # The edit is either being performed or saved group = parser.add_mutually_exclusive_group(required=True) group.add_argument( '-s',action='store_true',dest='save', @@ -97,6 +100,8 @@ def get_yaml ( fname, return_yaml=False ) : ''' chunk_size = 1024 # Arbitrary chunk to read + + # TODO - this needs to change to look for '\r\n' and not just '\n'. dstart = b'\x0a\x2e\x2e\x2e\x0a' # The binary data delimiter - '\n...\n' dlen = len(dstart) # Length of binary delimiter @@ -160,7 +165,7 @@ def edit_func ( fname ) : # TODO - validate an ASDF file fullpath = os.path.abspath(fname) if not is_asdf_file(fullpath) : - print(f"To use the '-e' option, as ASDF file must be inputted.") + print("To use the '-e' option, as ASDF file must be inputted.") print(f"The file is not an ASDF: \n'{fullpath}'\n") return False @@ -222,7 +227,7 @@ def save_func ( fname ) : fullpath = os.path.abspath(fname) fullasdf = get_asdf_name(fullpath) if not is_yaml_file(fullpath) : # Validate YAML file - print(f"To use the '-s' option, as YAML fle must be inputted.") + print("To use the '-s' option, as YAML fle must be inputted.") print(f"The file is not a YAML: \n'{fullpath}'\n") return False From b846d5f4d8f1e2539e5d9e7c7cb5cc7bd7ce7fac Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 1 Sep 2020 12:48:42 -0400 Subject: [PATCH 03/47] Pushing an error trying to expose a function in asdf.py to be used in edit.py --- asdf/__init__.py | 1 + asdf/asdf.py | 23 +++++++++++++++++++++++ asdf/commands/__init__.py | 15 +++++++++++---- asdf/commands/edit.py | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/asdf/__init__.py b/asdf/__init__.py index 3ea5bf0b9..3ece33c1e 100644 --- a/asdf/__init__.py +++ b/asdf/__init__.py @@ -13,6 +13,7 @@ 'AsdfFile', 'CustomType', 'AsdfExtension', 'Stream', 'open', 'test', 'commands', 'IntegerType', 'ExternalArrayReference', 'info', '__version__', '__githash__', 'ValidationError', 'get_config', 'config_context', + 'parse_asdf_header_line', ] diff --git a/asdf/asdf.py b/asdf/asdf.py index 50d160bc8..076a6b1f3 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -35,6 +35,17 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata +def parse_asdf_header_line ( line ) : + parts = line.split() + if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: + raise ValueError("Does not appear to be a ASDF file.") + + try: + version = versioning.AsdfVersion(parts[1].decode('ascii')) + except ValueError: + raise ValueError("Unparseable version in ASDF file: {0}".format(parts[1])) + + return version def get_asdf_library_info(): """ @@ -733,6 +744,8 @@ def _parse_header_line(cls, line): """ Parses the header line in a ASDF file to obtain the ASDF version. """ + return parse_asdf_header_line(line) + ''' parts = line.split() if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: raise ValueError("Does not appear to be a ASDF file.") @@ -744,6 +757,7 @@ def _parse_header_line(cls, line): "Unparseable version in ASDF file: {0}".format(parts[1])) return version + ''' @classmethod def _parse_comment_section(cls, content): @@ -786,11 +800,13 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', **kwargs): """Attempt to populate AsdfFile data from file-like object""" + # Function 1 Extensions if strict_extension_check and ignore_missing_extensions: raise ValueError( "'strict_extension_check' and 'ignore_missing_extensions' are " "incompatible options") + # Function 2 Validate if "validate_on_read" in kwargs: warnings.warn( "The 'validate_on_read' argument is deprecated, set " @@ -811,16 +827,20 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', else: legacy_fill_schema_defaults = get_config().legacy_fill_schema_defaults + # Function 3 Open self._mode = mode fd = generic_io.get_file(fd, mode=self._mode, uri=uri) self._fd = fd + + # Function 4 Validate ASDF # The filename is currently only used for tracing warning information self._fname = self._fd._uri if self._fd._uri else '' header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) self._file_format_version = cls._parse_header_line(header_line) self.version = self._file_format_version + # Function 5 Read and validate YAML comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', 5, "start of content", include=False, exception=False) @@ -867,13 +887,16 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', # to select the correct tag for us. tree = yamlutil.custom_tree_to_tagged_tree(AsdfObject(), self) + # Function 6 Process binary blocks if has_blocks: self._blocks.read_internal_blocks( fd, past_magic=True, validate_checksums=validate_checksums) self._blocks.read_block_index(fd, self) + # Function 7 References tree = reference.find_references(tree, self) + # Function 8 Schemas if self.version <= versioning.FILL_DEFAULTS_MAX_VERSION and legacy_fill_schema_defaults: schema.fill_defaults(tree, self, reading=True) diff --git a/asdf/commands/__init__.py b/asdf/commands/__init__.py index aa70f659b..3a084d63e 100644 --- a/asdf/commands/__init__.py +++ b/asdf/commands/__init__.py @@ -9,10 +9,17 @@ from .info import info from .edit import edit - -__all__ = ['edit', 'implode', 'explode', 'to_yaml', 'defragment', 'diff', 'list_tags', - 'find_extensions', 'info'] - +__all__ = [ + 'defragment', + 'diff', + 'edit', + 'explode', + 'find_extensions', + 'implode', + 'info' + 'list_tags', + 'to_yaml', +] # Extracting ASDF-in-FITS files requires Astropy if importlib.util.find_spec('astropy'): diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index cb1aab734..429a8a35c 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -7,6 +7,7 @@ import sys import asdf +from .. import generic_io from .main import Command from .. import AsdfFile @@ -151,12 +152,46 @@ def get_asdf_name ( fname ) : base, ext = os.path.splitext(fname) return base + '.asdf' +def validate_asdf_path ( fname ) : + if not os.path.exists(fname) : + print(f"Error: No file '{fname}' exists.") + return False + + base, ext = os.path.splitext(fname) + if ext!='.asdf' : + return False + return True + +def validate_asdf_file ( fd ) : + header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) + print(f"header_line = {header_line}") + #self._file_format_version = cls._parse_header_line(header_line) + file_format_version = asdf.parse_asdf_header_line(header_line) + + def edit_func ( fname ) : """ Creates a YAML file from an ASDF file. The YAML file will contain only the YAML from the ASDF file. The YAML text will be written to a YAML text file in the same, so from 'example.asdf' the file 'example.yaml' will be created. + Parameters + ---------- + fname : The input file name. + """ + if not validate_asdf_path(fname) : + return False + + fullpath = os.path.abspath(fname) + fd = generic_io.get_file(fullpath, mode="r") + validate_asdf_file(fd) + +def edit_func_old ( fname ) : + """ + Creates a YAML file from an ASDF file. The YAML file will contain only the + YAML from the ASDF file. The YAML text will be written to a YAML text file + in the same, so from 'example.asdf' the file 'example.yaml' will be created. + Parameters ---------- fname : The input file name. From 8b2c9be8dfee02e90a8f28dba65e52660f1873fc Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 1 Sep 2020 13:42:49 -0400 Subject: [PATCH 04/47] Corrected importation of internal function error in edit.py. --- asdf/__init__.py | 1 - asdf/asdf.py | 11 +++++------ asdf/commands/edit.py | 11 ++++++----- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/asdf/__init__.py b/asdf/__init__.py index 3ece33c1e..3ea5bf0b9 100644 --- a/asdf/__init__.py +++ b/asdf/__init__.py @@ -13,7 +13,6 @@ 'AsdfFile', 'CustomType', 'AsdfExtension', 'Stream', 'open', 'test', 'commands', 'IntegerType', 'ExternalArrayReference', 'info', '__version__', '__githash__', 'ValidationError', 'get_config', 'config_context', - 'parse_asdf_header_line', ] diff --git a/asdf/asdf.py b/asdf/asdf.py index 076a6b1f3..fe6941648 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -35,7 +35,10 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata -def parse_asdf_header_line ( line ) : +def __parse_asdf_header_line ( line ) : + """ Parses the header line (first line) of an ASDF file and verifies + it is properly formatted. + """ parts = line.split() if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: raise ValueError("Does not appear to be a ASDF file.") @@ -744,7 +747,7 @@ def _parse_header_line(cls, line): """ Parses the header line in a ASDF file to obtain the ASDF version. """ - return parse_asdf_header_line(line) + return __parse_asdf_header_line(line) ''' parts = line.split() if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: @@ -800,13 +803,11 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', **kwargs): """Attempt to populate AsdfFile data from file-like object""" - # Function 1 Extensions if strict_extension_check and ignore_missing_extensions: raise ValueError( "'strict_extension_check' and 'ignore_missing_extensions' are " "incompatible options") - # Function 2 Validate if "validate_on_read" in kwargs: warnings.warn( "The 'validate_on_read' argument is deprecated, set " @@ -827,13 +828,11 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', else: legacy_fill_schema_defaults = get_config().legacy_fill_schema_defaults - # Function 3 Open self._mode = mode fd = generic_io.get_file(fd, mode=self._mode, uri=uri) self._fd = fd - # Function 4 Validate ASDF # The filename is currently only used for tracing warning information self._fname = self._fd._uri if self._fd._uri else '' header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 429a8a35c..40db41bc7 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -6,7 +6,7 @@ import os import sys -import asdf +from asdf.asdf import __parse_asdf_header_line from .. import generic_io from .main import Command from .. import AsdfFile @@ -14,6 +14,7 @@ __all__ = ['edit'] +asdf_format_version = None class Edit(Command): @classmethod @@ -163,11 +164,11 @@ def validate_asdf_path ( fname ) : return True def validate_asdf_file ( fd ) : + global asdf_format_version header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) - print(f"header_line = {header_line}") - #self._file_format_version = cls._parse_header_line(header_line) - file_format_version = asdf.parse_asdf_header_line(header_line) - + asdf_format_version = __parse_asdf_header_line(header_line) + # Validate ASDF format version + return True def edit_func ( fname ) : """ From 38c622367cd0eebc97cfc7498c613596e0ee759a Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 1 Sep 2020 13:47:02 -0400 Subject: [PATCH 05/47] Correcting name formatting of internal use only functions. --- asdf/asdf.py | 8 ++++++-- asdf/commands/edit.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index fe6941648..bb2d0b5f3 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -35,7 +35,7 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata -def __parse_asdf_header_line ( line ) : +def _parse_asdf_header_line ( line ) : """ Parses the header line (first line) of an ASDF file and verifies it is properly formatted. """ @@ -747,7 +747,7 @@ def _parse_header_line(cls, line): """ Parses the header line in a ASDF file to obtain the ASDF version. """ - return __parse_asdf_header_line(line) + return _parse_asdf_header_line(line) ''' parts = line.split() if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: @@ -803,11 +803,13 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', **kwargs): """Attempt to populate AsdfFile data from file-like object""" + # Function 1 Extensions if strict_extension_check and ignore_missing_extensions: raise ValueError( "'strict_extension_check' and 'ignore_missing_extensions' are " "incompatible options") + # Function 2 Validate if "validate_on_read" in kwargs: warnings.warn( "The 'validate_on_read' argument is deprecated, set " @@ -828,11 +830,13 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', else: legacy_fill_schema_defaults = get_config().legacy_fill_schema_defaults + # Function 3 Open self._mode = mode fd = generic_io.get_file(fd, mode=self._mode, uri=uri) self._fd = fd + # Function 4 Validate ASDF # The filename is currently only used for tracing warning information self._fname = self._fd._uri if self._fd._uri else '' header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 40db41bc7..7c2a1fb44 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -6,7 +6,7 @@ import os import sys -from asdf.asdf import __parse_asdf_header_line +from asdf.asdf import _parse_asdf_header_line from .. import generic_io from .main import Command from .. import AsdfFile @@ -166,7 +166,7 @@ def validate_asdf_path ( fname ) : def validate_asdf_file ( fd ) : global asdf_format_version header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) - asdf_format_version = __parse_asdf_header_line(header_line) + asdf_format_version = _parse_asdf_header_line(header_line) # Validate ASDF format version return True From dbd99bcdfbae8527444f551a16656b27d43cf8d3 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 1 Sep 2020 16:17:46 -0400 Subject: [PATCH 06/47] Adding utility functions by refactoring class methods. --- asdf/asdf.py | 26 ++++++++++++++++++++------ asdf/commands/edit.py | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index bb2d0b5f3..699e431c8 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -35,6 +35,17 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata +def _parse_asdf_comment_section( content ): + comments = [] + + lines = content.splitlines() + for line in lines: + if not line.startswith(b'#'): + raise ValueError("Invalid content between header and tree") + comments.append(line[1:].strip()) + + return comments + def _parse_asdf_header_line ( line ) : """ Parses the header line (first line) of an ASDF file and verifies it is properly formatted. @@ -768,6 +779,8 @@ def _parse_comment_section(cls, content): Parses the comment section, between the header line and the Tree or first block. """ + return _parse_asdf_comment_section(content) + ''' comments = [] lines = content.splitlines() @@ -777,6 +790,7 @@ def _parse_comment_section(cls, content): comments.append(line[1:].strip()) return comments + ''' @classmethod def _find_asdf_version_in_comments(cls, comments): @@ -803,13 +817,13 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', **kwargs): """Attempt to populate AsdfFile data from file-like object""" - # Function 1 Extensions + # Make sure arguments aren't contradictory if strict_extension_check and ignore_missing_extensions: raise ValueError( "'strict_extension_check' and 'ignore_missing_extensions' are " "incompatible options") - # Function 2 Validate + # Set local variables if "validate_on_read" in kwargs: warnings.warn( "The 'validate_on_read' argument is deprecated, set " @@ -830,20 +844,19 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', else: legacy_fill_schema_defaults = get_config().legacy_fill_schema_defaults - # Function 3 Open + # Open the file self._mode = mode fd = generic_io.get_file(fd, mode=self._mode, uri=uri) self._fd = fd - # Function 4 Validate ASDF - # The filename is currently only used for tracing warning information + # Validate the ASDF header self._fname = self._fd._uri if self._fd._uri else '' header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) self._file_format_version = cls._parse_header_line(header_line) self.version = self._file_format_version - # Function 5 Read and validate YAML + # Read the optional comments line(s) comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', 5, "start of content", include=False, exception=False) @@ -859,6 +872,7 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', if extensions: self.extensions = extensions + # Read and validate YAML text. yaml_token = fd.read(4) has_blocks = False tree = None diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 7c2a1fb44..37a24bb0f 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -6,6 +6,8 @@ import os import sys +import asdf.constants as constants + from asdf.asdf import _parse_asdf_header_line from .. import generic_io from .main import Command @@ -165,11 +167,37 @@ def validate_asdf_path ( fname ) : def validate_asdf_file ( fd ) : global asdf_format_version + header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) asdf_format_version = _parse_asdf_header_line(header_line) # Validate ASDF format version - return True + comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', + 5, + "start of content", + include=False, + exception=False) + + return header_line + comment_section +def open_and_validate_asdf ( fname ) : + """ Open and validate the ASDF file, as well as read in all the YAML + that will be outputted to a YAML file. + """ + fullpath = os.path.abspath(fname) + fd = generic_io.get_file(fullpath, mode="r") + + header_and_comment = validate_asdf_file(fd) + ret_string = header_and_comment + + print(f"ret_string = {ret_string}") + sys.exit(1) + + return ret_string + +def open_and_validate_yaml ( fname ) : + ret_string = '' + return ret_string + def edit_func ( fname ) : """ Creates a YAML file from an ASDF file. The YAML file will contain only the @@ -183,9 +211,12 @@ def edit_func ( fname ) : if not validate_asdf_path(fname) : return False - fullpath = os.path.abspath(fname) - fd = generic_io.get_file(fullpath, mode="r") - validate_asdf_file(fd) + # 1. Validate input file is an ASDF file. + yaml_text = open_and_validate_asdf(fname) + + # 2. Read and validate the YAML of an ASDF file. + # 3. Open a YAML file for the ASDF YAML. + # 4. Write the YAML for the original ASDF file. def edit_func_old ( fname ) : """ From 5a9cc7b22565b5cc1d06669b99a8b6a1cec9f14e Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 1 Sep 2020 17:51:02 -0400 Subject: [PATCH 07/47] Part of the way through asdf.py refactor for code reuse in edit.py. --- asdf/asdf.py | 17 +++++++++++++++++ asdf/commands/edit.py | 32 +++++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index 699e431c8..6f3f62939 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -35,6 +35,19 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata +def _get_asdf_version_in_comments( comments ): + for comment in comments: + parts = comment.split() + if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: + try: + version = versioning.AsdfVersion(parts[1].decode('ascii')) + except ValueError: + pass + else: + return version + + return None + def _parse_asdf_comment_section( content ): comments = [] @@ -794,6 +807,8 @@ def _parse_comment_section(cls, content): @classmethod def _find_asdf_version_in_comments(cls, comments): + return _get_asdf_version_in_comments(comments) + ''' for comment in comments: parts = comment.split() if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: @@ -805,6 +820,7 @@ def _find_asdf_version_in_comments(cls, comments): return version return None + ''' @classmethod def _open_asdf(cls, self, fd, uri=None, mode='r', @@ -873,6 +889,7 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', self.extensions = extensions # Read and validate YAML text. + # It's possible there is no YAML, so the next token could be BLOCK_MAGIC yaml_token = fd.read(4) has_blocks = False tree = None diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 37a24bb0f..1994d583b 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -9,7 +9,10 @@ import asdf.constants as constants from asdf.asdf import _parse_asdf_header_line +from asdf.asdf import _parse_asdf_comment_section +from asdf.asdf import _get_asdf_version_in_comments from .. import generic_io +from .. import yamlutil from .main import Command from .. import AsdfFile @@ -17,6 +20,7 @@ __all__ = ['edit'] asdf_format_version = None +asdf_standard_version = None class Edit(Command): @classmethod @@ -167,6 +171,7 @@ def validate_asdf_path ( fname ) : def validate_asdf_file ( fd ) : global asdf_format_version + global asdf_standard_version header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) asdf_format_version = _parse_asdf_header_line(header_line) @@ -176,6 +181,8 @@ def validate_asdf_file ( fd ) : "start of content", include=False, exception=False) + comments = _parse_asdf_comment_section(comment_section) + asdf_standard_version = _get_asdf_version_in_comments(comments) return header_line + comment_section @@ -189,12 +196,25 @@ def open_and_validate_asdf ( fname ) : header_and_comment = validate_asdf_file(fd) ret_string = header_and_comment - print(f"ret_string = {ret_string}") + return fd, ret_string + +def read_and_validate_yaml ( fd, fname ) : + YAML_TOKEN = b'%YAML' + token = fd.read(len(YAML_TOKEN)) + if token != YAML_TOKEN : + print(f"Error: No YAML in '{fname}'") + sys.exit(0) + + reader = fd.reader_until(constants.YAML_END_MARKER_REGEX, + 7, + 'End of YAML marker', + include=True, + initial_content=token) + yaml_content = reader.read() + tree = yamlutil.load_tree(reader) + print(f"tree = \n{tree}\n") # Why is this None? sys.exit(1) - return ret_string - -def open_and_validate_yaml ( fname ) : ret_string = '' return ret_string @@ -212,9 +232,11 @@ def edit_func ( fname ) : return False # 1. Validate input file is an ASDF file. - yaml_text = open_and_validate_asdf(fname) + fd, asdf_text = open_and_validate_asdf(fname) # 2. Read and validate the YAML of an ASDF file. + yaml_text = read_and_validate_yaml(fd,fname) + # 3. Open a YAML file for the ASDF YAML. # 4. Write the YAML for the original ASDF file. From 49d07e0775f3b5052e9f9097f9fd460cfd486831 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 2 Sep 2020 14:13:09 -0400 Subject: [PATCH 08/47] Completed '-e' option for the asdftool 'edit' subcommand. --- asdf/asdf.py | 52 ++----- asdf/commands/edit.py | 334 +++++++++++++++++++----------------------- 2 files changed, 165 insertions(+), 221 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index 6f3f62939..d9db4b46a 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -6,6 +6,8 @@ import warnings from pkg_resources import parse_version +import ipdb + import numpy as np from jsonschema import ValidationError @@ -36,6 +38,8 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata def _get_asdf_version_in_comments( comments ): + """ From the initial comments line in an ASDF file, capture the ASDF version. + """ for comment in comments: parts = comment.split() if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: @@ -49,6 +53,9 @@ def _get_asdf_version_in_comments( comments ): return None def _parse_asdf_comment_section( content ): + """ Parses the comment section, between the header line and the + Tree or first block. + """ comments = [] lines = content.splitlines() @@ -772,19 +779,6 @@ def _parse_header_line(cls, line): Parses the header line in a ASDF file to obtain the ASDF version. """ return _parse_asdf_header_line(line) - ''' - parts = line.split() - if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: - raise ValueError("Does not appear to be a ASDF file.") - - try: - version = versioning.AsdfVersion(parts[1].decode('ascii')) - except ValueError: - raise ValueError( - "Unparseable version in ASDF file: {0}".format(parts[1])) - - return version - ''' @classmethod def _parse_comment_section(cls, content): @@ -793,34 +787,13 @@ def _parse_comment_section(cls, content): Tree or first block. """ return _parse_asdf_comment_section(content) - ''' - comments = [] - - lines = content.splitlines() - for line in lines: - if not line.startswith(b'#'): - raise ValueError("Invalid content between header and tree") - comments.append(line[1:].strip()) - - return comments - ''' @classmethod def _find_asdf_version_in_comments(cls, comments): + """ From the initial comments line in an ASDF file, capture the ASDF + version. + """ return _get_asdf_version_in_comments(comments) - ''' - for comment in comments: - parts = comment.split() - if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: - try: - version = versioning.AsdfVersion(parts[1].decode('ascii')) - except ValueError: - pass - else: - return version - - return None - ''' @classmethod def _open_asdf(cls, self, fd, uri=None, mode='r', @@ -840,6 +813,8 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', "incompatible options") # Set local variables + # TODO From here to self._mode = mode, can put in a function + # TODO validate_on_read, legacy_fill_schema_defaults = validate_and_schema(kwargs) if "validate_on_read" in kwargs: warnings.warn( "The 'validate_on_read' argument is deprecated, set " @@ -914,6 +889,7 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', elif yaml_token != b'': raise IOError("ASDF file appears to contain garbage after header.") + # The variable tree gets overwritten mulitple times. Why? if tree is None: # At this point the tree should be tagged, but we want it to be # tagged with the core/asdf version appropriate to this file's @@ -928,6 +904,7 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', self._blocks.read_block_index(fd, self) # Function 7 References + ipdb.set_trace() tree = reference.find_references(tree, self) # Function 8 Schemas @@ -936,6 +913,7 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', if validate_on_read: try: + # TODO Validation will take some work to separate from the class. self._validate(tree, reading=True) except ValidationError: self.close() diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 1994d583b..fdcde1457 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -1,8 +1,9 @@ """ -Contains commands for dealing with exploded and imploded forms. +Contains commands for lightweight text editing of an ASDF file. +Future work: Make this interactive editing. """ - +import io import os import sys @@ -11,20 +12,26 @@ from asdf.asdf import _parse_asdf_header_line from asdf.asdf import _parse_asdf_comment_section from asdf.asdf import _get_asdf_version_in_comments + +from .. import AsdfFile from .. import generic_io +from .. import reference +from .. import schema from .. import yamlutil -from .main import Command -from .. import AsdfFile +from .main import Command __all__ = ['edit'] -asdf_format_version = None -asdf_standard_version = None +#asdf_format_version = None +#asdf_standard_version = None + class Edit(Command): @classmethod def setup_arguments(cls, subparsers): + """ Set up a command line argument parser for the edit subcommand. + """ desc_string = "Allows for easy editing of the YAML in an ASDF file. " \ "For edit mode, the YAML portion of an ASDF file is" \ "separated from the ASDF into a text file for easy" \ @@ -39,7 +46,12 @@ def setup_arguments(cls, subparsers): # Need an input file parser.add_argument( '--infile', '-f', type=str, required=True, dest='fname', - help="Input file") + help="Input file (ASDF for -e option, YAML for -s option") + + # Need an output file + parser.add_argument( + '--outfile', '-o', type=str, required=True, dest='oname', + help="Output file (YAML for -e option, ASDF for -s option") # The edit is either being performed or saved group = parser.add_mutually_exclusive_group(required=True) @@ -56,8 +68,11 @@ def setup_arguments(cls, subparsers): @classmethod def run(cls, args): + """ Execute the edit subcommand. + """ return edit(args) + def is_yaml_file ( fname ) : ''' Determines if a file is a YAML file based only on the file extension. @@ -72,6 +87,7 @@ def is_yaml_file ( fname ) : return False return True + def is_asdf_file ( fname ) : ''' Determines if a file is ASDF based on file extension and the first @@ -94,131 +110,142 @@ def is_asdf_file ( fname ) : return True -def get_yaml ( fname, return_yaml=False ) : - ''' - Reads all bytes from an ASDF file up to the '\n...\n' delimiter, which - separates the YAML text from the binary data. The location of the - first byte of the delimiter is alwyas returned. When requested, the - YAML text is also, returned. + +def is_validate_path_and_ext ( fname, wanted_ext=None ) : + """ Validates the path exists and the extension is one wanted. Parameters ---------- fname : The input file name. - return_yaml : the boolean flag to return the YAML text - ''' + wanted_ext : List of extensions to check. + """ + if not os.path.exists(fname) : + print(f"Error: No file '{fname}' exists.") + return False - chunk_size = 1024 # Arbitrary chunk to read + # Simply validates the path existence + if wanted_ext is None: + return True + + # Make sure the extension is one desired. + base, ext = os.path.splitext(fname) + if ext not in wanted_ext: + return False - # TODO - this needs to change to look for '\r\n' and not just '\n'. - dstart = b'\x0a\x2e\x2e\x2e\x0a' # The binary data delimiter - '\n...\n' - dlen = len(dstart) # Length of binary delimiter + return True - with open(fname,"r+b") as fd : - dfound = False # No data found, yet - fbytes = fd.read(chunk_size) - chunk_cnt = 0 - chunk_start = 0 - chunk_end = chunk_start + chunk_size - dlen - while not dfound : - for k in range(chunk_start,chunk_end) : - if dstart==fbytes[k:k+dlen] : # Check for the data delimiter - dfound = True - if return_yaml : - return k, fbytes[:k].decode('utf-8') - else : - return k, '' - chunk_cnt = chunk_cnt + 1 # Count the number of chunks read - cbytes = fd.read(chunk_size) - if cbytes is None : - return -1, '' # EOF without finding delimiter - fbytes += cbytes # Save all bytes read - chunk_start = chunk_cnt * chunk_size - dlen - chunk_end = chunk_start + chunk_size - - return -1, '' # EOF without finding delimiter - -def get_yaml_name ( fname ) : - ''' - Using the base ASDF name, create a corresponding YAML file name. + +def is_validate_asdf_path ( fname ) : + """ Validates fname path exists and has extension '.asdf'. Parameters ---------- fname : The input file name. - ''' - base, ext = os.path.splitext(fname) - return base + '.yaml' + """ + ext = ['.asdf'] + if is_validate_path_and_ext(fname,ext) : + return True + print(f"Error: '{fname}' should have extension '{ext[0]}'") + return False -def get_asdf_name ( fname ) : - ''' - Using the base YAML name, create a corresponding ASDF file name. + +def is_validate_yaml_path ( fname ) : + """ Validates fname path exists and has extension '.yaml'. Parameters ---------- fname : The input file name. - ''' - base, ext = os.path.splitext(fname) - return base + '.asdf' - -def validate_asdf_path ( fname ) : - if not os.path.exists(fname) : - print(f"Error: No file '{fname}' exists.") - return False + """ + ext = ['.yaml'] + if is_validate_path_and_ext(fname,ext) : + return True + print(f"Error: '{fname}' should have extension '{ext[0]}'") + return False - base, ext = os.path.splitext(fname) - if ext!='.asdf' : - return False - return True def validate_asdf_file ( fd ) : - global asdf_format_version - global asdf_standard_version + """ Makes sure the header line is the expected one, as well + as getting the optional comment line. + + Parameters + ---------- + fd : GenericFile + """ + #global asdf_format_version + #global asdf_standard_version + ASDF_ID = b'#ASDF' header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) - asdf_format_version = _parse_asdf_header_line(header_line) - # Validate ASDF format version + if ASDF_ID!=header_line[:len(ASDF_ID)] : + # Raise exception + print("Invalid ASDF ID") + sys.exit(1) + + #asdf_format_version = _parse_asdf_header_line(header_line) + # Maybe validate ASDF format version comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', 5, "start of content", include=False, exception=False) - comments = _parse_asdf_comment_section(comment_section) - asdf_standard_version = _get_asdf_version_in_comments(comments) + # Maybe do the following for more validate. But maybe not. + #comments = _parse_asdf_comment_section(comment_section) + #asdf_standard_version = _get_asdf_version_in_comments(comments) return header_line + comment_section def open_and_validate_asdf ( fname ) : """ Open and validate the ASDF file, as well as read in all the YAML that will be outputted to a YAML file. + + Parameters + ---------- + fname : The input file name. """ fullpath = os.path.abspath(fname) fd = generic_io.get_file(fullpath, mode="r") + # Read the ASDF header and optional comments section header_and_comment = validate_asdf_file(fd) - ret_string = header_and_comment - return fd, ret_string + return fd, header_and_comment # Return GenericFile and ASDF header bytes. def read_and_validate_yaml ( fd, fname ) : + """ Get the YAML text from an ASDF formatted file. + + Parameters + ---------- + fname : The input file name. + fd : GenericFile for fname. + """ YAML_TOKEN = b'%YAML' token = fd.read(len(YAML_TOKEN)) if token != YAML_TOKEN : + # Raise exception print(f"Error: No YAML in '{fname}'") sys.exit(0) + # Get YAML reader and content reader = fd.reader_until(constants.YAML_END_MARKER_REGEX, 7, 'End of YAML marker', include=True, initial_content=token) yaml_content = reader.read() - tree = yamlutil.load_tree(reader) - print(f"tree = \n{tree}\n") # Why is this None? - sys.exit(1) - ret_string = '' - return ret_string + # Create a YAML tree to validate + # The YAML text must be converted to a stream. + tree = yamlutil.load_tree(io.BytesIO(yaml_content)) + if tree is None: + # Raise exception. + print("Error: 'yamlutil.load_tree' failed to return a tree.") + sys.exist(1) + + schema.validate(tree, None) # Failure raises and exception. + + return yaml_content -def edit_func ( fname ) : +def edit_func ( fname, oname ) : """ Creates a YAML file from an ASDF file. The YAML file will contain only the YAML from the ASDF file. The YAML text will be written to a YAML text file @@ -226,9 +253,10 @@ def edit_func ( fname ) : Parameters ---------- - fname : The input file name. + fname : The input ASDF file name. + oname : The output YAML file name. """ - if not validate_asdf_path(fname) : + if not is_validate_asdf_path(fname) : return False # 1. Validate input file is an ASDF file. @@ -238,63 +266,35 @@ def edit_func ( fname ) : yaml_text = read_and_validate_yaml(fd,fname) # 3. Open a YAML file for the ASDF YAML. - # 4. Write the YAML for the original ASDF file. - -def edit_func_old ( fname ) : - """ - Creates a YAML file from an ASDF file. The YAML file will contain only the - YAML from the ASDF file. The YAML text will be written to a YAML text file - in the same, so from 'example.asdf' the file 'example.yaml' will be created. - - Parameters - ---------- - fname : The input file name. - """ - - # TODO - validate an ASDF file - fullpath = os.path.abspath(fname) - if not is_asdf_file(fullpath) : - print("To use the '-e' option, as ASDF file must be inputted.") - print(f"The file is not an ASDF: \n'{fullpath}'\n") - return False - - # Get YAML from ASDF and its end location in the YAML file - loc, yaml_string = get_yaml(fullpath,return_yaml=True) - if -1==loc : - print(f"Could not find the YAML of '{fullpath}'",file=sys.stderr) + if not is_yaml_file(oname) : + # Raise an exception + print(f"Error: '{oname}' must have '.yaml' extension.") sys.exit(1) - # Open YAML file - fullyaml = get_yaml_name(fullpath) - - # Write all YAML from ASDF to YAML - with open(fullyaml,"w") as fd : - fd.write(f"{yaml_string}") - # Tell user - delim = '*' * 65 - print(f"{delim}") - print(f"A YAML text file has been created at:\n'{fullyaml}'\n") - print("Edit this file in any text editor, then run the following command") - print("to save YAML edits to the ASDF file:\n") - print(f"'asdftool edit -s --infile {fullyaml}") - print(f"\n{delim}") - - -def get_yaml_with_no_trailing_whitespace ( yamlpath ) : - ''' - Get the YAML text from an ASDF file and remove any trailing whitespace. - - Parameters - ---------- - fname : The input YAML file. - ''' - with open(yamlpath,"r") as fd : - yaml = fd.read() - return yaml.rstrip() - - return '' + # 4. Write the YAML for the original ASDF file. + with open(oname,"wb") as ofd : + ofd.write(asdf_text) + ofd.write(yaml_text) -def save_func ( fname ) : + # 5. Output message to user. + delim = '*' * 70 + print(f"\n{delim}") + print("ASDF formatting and YAML schema validated.") + print(f"The text portion of '{fname}' is written to:") + print(f" '{oname}'") + print(f"The file '{oname}' can be edited using your favorite text editor.") + print("The edited text can then be saved to the ASDF file of your choice") + print("using 'asdftool edit -s -f -o .") + print('-' * 70) + print("Note: This is meant to be a lightweight text editing tool of") + print(" ASDF .If the edited text is larger than the YAML portion") + print(" of the ASDF file to be written to, the edits may not be") + print(" able to saved.") + print(f"{delim}\n") + + return + +def save_func ( fname, oname ) : """ Checks to makes sure a corresponding ASDF file exists. This is done by seeing if a file of the same name with '.asdf' as an extension exists. @@ -308,65 +308,31 @@ def save_func ( fname ) : Parameters ---------- fname : The input YAML file. + oname : The output ASDF file name. """ _1G = 1000**3 # 1 gig C = 1 # constant multiple of gig SMALL_FILE_SIZE = C * _1G - fullpath = os.path.abspath(fname) - fullasdf = get_asdf_name(fullpath) - if not is_yaml_file(fullpath) : # Validate YAML file - print("To use the '-s' option, as YAML fle must be inputted.") - print(f"The file is not a YAML: \n'{fullpath}'\n") + if not is_validate_yaml_path(fname): return False - # Check to see if a corresponding ASDF file exists - if not os.path.exists(fullasdf) : - print(f"Error: ASDF file does not exist '{fullasdf}'",file=sys.stderr) + if not is_validate_asdf_path(oname): + return False - # Find end of YAML in ASDF - loc, yaml_string = get_yaml(fullasdf,return_yaml=False) - if -1==loc : - print(f"Could not find the YAML of '{fullasdf}'",file=sys.stderr) - sys.exit(1) + # 1. Validate input file is an ASDF file. + fd, asdf_text = open_and_validate_asdf(fname) - # Read YAML - yaml = get_yaml_with_no_trailing_whitespace(fullpath) - yaml_bytes = bytes(yaml,'utf-8') - - # TODO - validate YAML format and schema (maybe do this else where) - - # If larger than YAML in ASDF - # TODO - Investigate python module fileinput - #print(f"loc = {loc}, len(yaml) = {len(yaml)}") - if loc == len(yaml_bytes) : - #with open(fullasdf,"w") as fd : - with open(fullasdf,"r+b") as fd : - fd.write(yaml_bytes) - print("Good write") - elif loc > len(yaml_bytes) : - diff = loc - len(yaml_bytes) - # pad out YAML with spaces to ensure the entire YAML portion is overwritten - whitespace = ' ' * diff - bwrite = yaml_bytes + bytes(whitespace,'utf-8') - with open(fullasdf,"r+b") as fd : - fd.write(bwrite) - else : - # TODO - add functionality to detect the size of the ASDF file. If it's - # smaller than a specific size rewrire the whole file. If it's - # larger than a specific size tell the user to see if he wants a - # rewrite. - print(f"\n\nYAML text ({len(yaml):,} bytes) in\n '{fullpath}'") - print(f"is larger than available space ({loc} bytes) in") - print(f" {fullasdf}\n\n") - asdf_size = os.path.getsize(fullasdf) - if asdf_size < SMALL_FILE_SIZE : - print(f"asdf_size = {asdf_size:,} and is less than {SMALL_FILE_SIZE:,} bytes") - else : - print(f"asdf_size = {asdf_size:,} and is greater than {SMALL_FILE_SIZE:,} bytes") - print("\n") + # 2. Read and validate the YAML of an ASDF file. + yaml_text = read_and_validate_yaml(fd,fname) + + edited_text = asdf_text + yaml_text + # 3. Get text from ASDF file. + # 4. Compare text sizes and maybe output. + # 5. Output message to user. + return def edit ( args ) : """ @@ -378,9 +344,9 @@ def edit ( args ) : args : The command line arguments. """ if args.edit : - return edit_func(args.fname) + return edit_func(args.fname,args.oname) elif args.save : - return save_func(args.fname) + return save_func(args.fname,args.oname) else : return print("Invalid arguments") From 1bc4ef1d7ce5083c37a471d4c5c133f4b3bf2424 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 2 Sep 2020 17:05:28 -0400 Subject: [PATCH 09/47] The edit command is almost finished. For a rewritten ASDF file, due to larger edited text, the block index at the end will need to be re-computed. Currently, if one exists, it is simply written out, but it is wrong. --- asdf/commands/edit.py | 117 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 101 insertions(+), 16 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index fdcde1457..f278516ad 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -259,24 +259,24 @@ def edit_func ( fname, oname ) : if not is_validate_asdf_path(fname) : return False - # 1. Validate input file is an ASDF file. + # Validate input file is an ASDF file. fd, asdf_text = open_and_validate_asdf(fname) - # 2. Read and validate the YAML of an ASDF file. + # Read and validate the YAML of an ASDF file. yaml_text = read_and_validate_yaml(fd,fname) - # 3. Open a YAML file for the ASDF YAML. + # Open a YAML file for the ASDF YAML. if not is_yaml_file(oname) : # Raise an exception print(f"Error: '{oname}' must have '.yaml' extension.") sys.exit(1) - # 4. Write the YAML for the original ASDF file. + # Write the YAML for the original ASDF file. with open(oname,"wb") as ofd : ofd.write(asdf_text) ofd.write(yaml_text) - # 5. Output message to user. + # Output message to user. delim = '*' * 70 print(f"\n{delim}") print("ASDF formatting and YAML schema validated.") @@ -294,6 +294,61 @@ def edit_func ( fname, oname ) : return +def buffer_edited_text ( edited_text, orig_text ) : + """ There is more text in the original ASDF file than in the edited text, + so we will buffer the edited text with spaces. + """ + diff = len(orig_text) - len(edited_text) + if diff<1 : + print("Error: shouldn't be here.") + sys.exit(1) + + wdelim = b'\r\n...\r\n' + ldelim = b'\n...\n' + if edited_text[-len(wdelim):]==wdelim : + delim = wdelim + elif edited_text[-len(ldelim):]==ldelim : + delim = ldelim + else: + # Raise exception + print("Unrecognized YAML delimiter ending the YAML text.") + print(f"It should be {wdelim} or {ldelim}, but the") + print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") + sys.exit(1) + + buffered_text = edited_text[:-len(delim)] + b'\n' + b' '*(diff-1) + delim + return buffered_text, diff-1 + + # buffered_text = edited_text[:-len(delim)] + b' '*diff + delim + #return buffered_text, diff + +def rewrite_asdf_file ( edited_text, orig_text, oname, fname ) : + tmp_oname = oname + '.tmp' + buffer_size = 10 * 1000 + buffer_text = b'\n' + b' ' * buffer_size + #print("Here") + #return + + with open(oname,"r+b") as fd : + orig_buffer = fd.read() + asdf_blocks = orig_buffer[len(orig_text):] + out_bytes = edited_text + buffer_text + asdf_blocks + + # TODO Compute new block index!!!! + + with open(tmp_oname,"w+b") as fd : + fd.write(out_bytes) + os.rename(tmp_oname,oname) + delim = '*' * 70 + print(f"\n{delim}") + print(f"The text in '{fname}' was too large to simply overwrite the") + print(f"text in '{oname}'. The file '{oname}' was rewritten to") + print(f"accommodate the larger text size. Also, {len(buffer_text):,} bytes") + print(f"as a buffer for the text in '{oname}' to allow for future edits.") + print(f"**** If a block index existed in the original ASDF,") + print(f" it is now invalidated. This needs to be fixed.") + print(f"{delim}\n") + def save_func ( fname, oname ) : """ Checks to makes sure a corresponding ASDF file exists. This is done by @@ -320,17 +375,47 @@ def save_func ( fname, oname ) : if not is_validate_asdf_path(oname): return False - # 1. Validate input file is an ASDF file. - fd, asdf_text = open_and_validate_asdf(fname) - - # 2. Read and validate the YAML of an ASDF file. - yaml_text = read_and_validate_yaml(fd,fname) - - edited_text = asdf_text + yaml_text - - # 3. Get text from ASDF file. - # 4. Compare text sizes and maybe output. - # 5. Output message to user. + # Validate input file is an ASDF formatted YAML. + ifd, iasdf_text = open_and_validate_asdf(fname) + iyaml_text = read_and_validate_yaml(ifd,fname) + ifd.close() + edited_text = iasdf_text + iyaml_text + + # Get text from ASDF file. + ofd, oasdf_text = open_and_validate_asdf(oname) + oyaml_text = read_and_validate_yaml(ofd,oname) + ofd.close() + asdf_text = oasdf_text + oyaml_text + + # Compare text sizes and maybe output. + # There are three cases: + msg_delim = '*' * 70 + if len(edited_text) == len(asdf_text) : + with open(oname,"r+b") as fd : + fd.write(edited_text) + print(f"\n{msg_delim}") + print(f"The edited text in '{fname}' was written to '{oname}'") + print(f"{msg_delim}\n") + elif len(edited_text) < len(asdf_text) : + buffered_text, diff = buffer_edited_text(edited_text,asdf_text) + with open(oname,"r+b") as fd : + fd.write(buffered_text) + print(f"\n{msg_delim}") + print(f"The edited text in '{fname}' was written to '{oname}'") + print(f"Added a {diff} buffer of spaces between the YAML text and binary blocks.") + print(f"{msg_delim}\n") + else : + if os.stat(oname).st_size <= SMALL_FILE_SIZE : + rewrite_asdf_file(edited_text,asdf_text,oname,fname) + else: + print(f"\n{msg_delim}") + print(f"Cannot write the text from '{fname}' to '{oname}'.") + print(f"There is too much edited text to write and the ASDF file") + print(f"is too large to rewrite.") + print("Another method must be used to edit '{oname}'.") + print(f"{msg_delim}\n") + + # Output message to user. return From 83eeb979ab530d998c4b0bfcb03aeb0a1de2e07a Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Fri, 4 Sep 2020 09:21:34 -0400 Subject: [PATCH 10/47] Finished the asdftool edit subcommand for edited YAML text with the same or fewer YAML characters as in the original ASDF file. --- asdf/asdf.py | 3 +++ asdf/commands/edit.py | 25 +++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index d9db4b46a..ecc2eba45 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -805,6 +805,9 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', ignore_missing_extensions=False, **kwargs): """Attempt to populate AsdfFile data from file-like object""" + # Generally, I think this function should be made smaller. There are + # many steps being taken here that can be broken down, with expressible + # function names to make clearer what this function does. # Make sure arguments aren't contradictory if strict_extension_check and ignore_missing_extensions: diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index f278516ad..f95a7c882 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -316,6 +316,7 @@ def buffer_edited_text ( edited_text, orig_text ) : print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") sys.exit(1) + # May not be correct. If on Windows use '\r\n'. buffered_text = edited_text[:-len(delim)] + b'\n' + b' '*(diff-1) + delim return buffered_text, diff-1 @@ -323,6 +324,14 @@ def buffer_edited_text ( edited_text, orig_text ) : #return buffered_text, diff def rewrite_asdf_file ( edited_text, orig_text, oname, fname ) : + """ TODO This function implentation needs to be finished. + Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, + the blocks will be rewritten. A block index will also be rewritten. If a + block index existed in the old file, it will have to be recomputed to + because of the larger YAML size and buffer, which changes the location of + the binary blocks. + """ + tmp_oname = oname + '.tmp' buffer_size = 10 * 1000 buffer_text = b'\n' + b' ' * buffer_size @@ -331,10 +340,16 @@ def rewrite_asdf_file ( edited_text, orig_text, oname, fname ) : with open(oname,"r+b") as fd : orig_buffer = fd.read() + + # Compute asdf_blocks and block_index asdf_blocks = orig_buffer[len(orig_text):] out_bytes = edited_text + buffer_text + asdf_blocks # TODO Compute new block index!!!! + # This should be straight forward by figuring out. Compute the length of + # edited_text plus the length of the buffer_text, the compute that difference + # with the length of the orig_text. This difference will be added to each + # index in the block index list. with open(tmp_oname,"w+b") as fd : fd.write(out_bytes) @@ -405,6 +420,13 @@ def save_func ( fname, oname ) : print(f"Added a {diff} buffer of spaces between the YAML text and binary blocks.") print(f"{msg_delim}\n") else : + print(f"\n{msg_delim}") + print(f"Cannot write the text from '{fname}' to '{oname}'.") + print(f"There is too much edited text to write and the ASDF file") + print(f"is too large to rewrite.") + print("Another method must be used to edit '{oname}'.") + print(f"{msg_delim}\n") + ''' if os.stat(oname).st_size <= SMALL_FILE_SIZE : rewrite_asdf_file(edited_text,asdf_text,oname,fname) else: @@ -414,8 +436,7 @@ def save_func ( fname, oname ) : print(f"is too large to rewrite.") print("Another method must be used to edit '{oname}'.") print(f"{msg_delim}\n") - - # Output message to user. + ''' return From 592bc6efc3e12a05a02794fc7018b0b7290b9355 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 8 Sep 2020 14:29:29 -0400 Subject: [PATCH 11/47] Completed the 'save' portion of the 'edit' subcommand for asdftool. It now rewrites small files, with a buffer for future editing, as well as a newly computed block index. --- asdf/commands/edit.py | 125 +++++++++++++++++++++++++++++++----------- 1 file changed, 93 insertions(+), 32 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index f95a7c882..0bfc96b47 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -5,6 +5,7 @@ import io import os +import struct import sys import asdf.constants as constants @@ -177,7 +178,7 @@ def validate_asdf_file ( fd ) : header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) if ASDF_ID!=header_line[:len(ASDF_ID)] : - # Raise exception + # Maybe raise exception print("Invalid ASDF ID") sys.exit(1) @@ -221,7 +222,7 @@ def read_and_validate_yaml ( fd, fname ) : YAML_TOKEN = b'%YAML' token = fd.read(len(YAML_TOKEN)) if token != YAML_TOKEN : - # Raise exception + # Maybe raise exception print(f"Error: No YAML in '{fname}'") sys.exit(0) @@ -237,7 +238,7 @@ def read_and_validate_yaml ( fd, fname ) : # The YAML text must be converted to a stream. tree = yamlutil.load_tree(io.BytesIO(yaml_content)) if tree is None: - # Raise exception. + # Maybe raise exception. print("Error: 'yamlutil.load_tree' failed to return a tree.") sys.exist(1) @@ -310,7 +311,7 @@ def buffer_edited_text ( edited_text, orig_text ) : elif edited_text[-len(ldelim):]==ldelim : delim = ldelim else: - # Raise exception + # Mabye raise exception print("Unrecognized YAML delimiter ending the YAML text.") print(f"It should be {wdelim} or {ldelim}, but the") print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") @@ -320,48 +321,116 @@ def buffer_edited_text ( edited_text, orig_text ) : buffered_text = edited_text[:-len(delim)] + b'\n' + b' '*(diff-1) + delim return buffered_text, diff-1 - # buffered_text = edited_text[:-len(delim)] + b' '*diff + delim - #return buffered_text, diff + +def add_buffer_to_new_text ( edited_text, buffer_size ) : + """ Adds buffer to edited text. + """ + wdelim = b'\r\n...\r\n' + ldelim = b'\n...\n' + if edited_text[-len(wdelim):]==wdelim : + delim = wdelim + elif edited_text[-len(ldelim):]==ldelim : + delim = ldelim + else: + # Maybe raise exception + print("Unrecognized YAML delimiter ending the YAML text.") + print(f"It should be {wdelim} or {ldelim}, but the") + print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") + sys.exit(1) + + buf = b' ' * buffer_size + buffered_text = edited_text[:-len(delim)] + b'\n' + buf + delim + + return buffered_text + +def compute_block_index_blocks ( start, asdf_blocks ) : + """ Computes new block index and strips any data after last found block. + """ + if constants.BLOCK_MAGIC!=asdf_blocks[:len(constants.BLOCK_MAGIC)] : + return [], asdf_blocks # Not sure if this should happen + + # Minimum block header is + # 4 bytes of magic number + # 2 bytes of header length, after the length field (min 48) + # 4 bytes flag + # 4 bytes compression + # 8 bytes allocated size + # 8 bytes used (on disk) size + # 8 bytes data size + # 16 bytes checksum + bmlen = len(constants.BLOCK_MAGIC) + min_header = bmlen + 2 + 48 + uidx = 22 + bindex = [start] + k = 0 + while len(asdf_blocks) - k > min_header : + hsz = struct.unpack(">H",asdf_blocks[k+bmlen:k+bmlen+2])[0] + used = struct.unpack(">Q",asdf_blocks[k+uidx:k+uidx+8])[0] + k = k + bmlen + 2 + hsz + used + if constants.BLOCK_MAGIC==asdf_blocks[k:k+bmlen] : + bindex.append(k+start) + else : + break + return bindex, asdf_blocks[:k] + + +def write_block_index ( fd, index ) : + if len(index) < 1 : + return + + bindex_hdr = b'#ASDF BLOCK INDEX\n%YAML 1.1\n---\n' + fd.write(bindex_hdr) + for idx in index : + ostr = f'- {idx}\n' + fd.write(ostr.encode('utf-8')) + end = b'...' + fd.write(end) + return + def rewrite_asdf_file ( edited_text, orig_text, oname, fname ) : - """ TODO This function implentation needs to be finished. - Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, + """ Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, the blocks will be rewritten. A block index will also be rewritten. If a block index existed in the old file, it will have to be recomputed to because of the larger YAML size and buffer, which changes the location of the binary blocks. + + Parameters + ---------- + edited_text : the new YAML text to write out. + orig_text : the original YAML text to overwrite. + oname : the ASDF file to overwrite. + fname : the edit YAML to write to new file. """ - tmp_oname = oname + '.tmp' + tmp_oname = oname + '.tmp' # Save as a temp file, in case anything goes wrong. buffer_size = 10 * 1000 - buffer_text = b'\n' + b' ' * buffer_size - #print("Here") - #return + buffered_text = add_buffer_to_new_text(edited_text,buffer_size) with open(oname,"r+b") as fd : - orig_buffer = fd.read() + orig_buffer = fd.read() # Small enough to simply read the whole thing - # Compute asdf_blocks and block_index + # Get the binary blocks, compute the new block index, and strip old block + # index, if it exists. asdf_blocks = orig_buffer[len(orig_text):] - out_bytes = edited_text + buffer_text + asdf_blocks - - # TODO Compute new block index!!!! - # This should be straight forward by figuring out. Compute the length of - # edited_text plus the length of the buffer_text, the compute that difference - # with the length of the orig_text. This difference will be added to each - # index in the block index list. + index, asdf_blocks = compute_block_index_blocks(len(buffered_text),asdf_blocks) + out_bytes = buffered_text + asdf_blocks + # Write new file with edited text, buffer, and recomputed block index. with open(tmp_oname,"w+b") as fd : fd.write(out_bytes) + write_block_index(fd,index) + + # Rename temp file. os.rename(tmp_oname,oname) + + # Output message to user. delim = '*' * 70 print(f"\n{delim}") print(f"The text in '{fname}' was too large to simply overwrite the") print(f"text in '{oname}'. The file '{oname}' was rewritten to") - print(f"accommodate the larger text size. Also, {len(buffer_text):,} bytes") + print(f"accommodate the larger text size. Also, {buffer_size:,} bytes") print(f"as a buffer for the text in '{oname}' to allow for future edits.") - print(f"**** If a block index existed in the original ASDF,") - print(f" it is now invalidated. This needs to be fixed.") print(f"{delim}\n") def save_func ( fname, oname ) : @@ -420,13 +489,6 @@ def save_func ( fname, oname ) : print(f"Added a {diff} buffer of spaces between the YAML text and binary blocks.") print(f"{msg_delim}\n") else : - print(f"\n{msg_delim}") - print(f"Cannot write the text from '{fname}' to '{oname}'.") - print(f"There is too much edited text to write and the ASDF file") - print(f"is too large to rewrite.") - print("Another method must be used to edit '{oname}'.") - print(f"{msg_delim}\n") - ''' if os.stat(oname).st_size <= SMALL_FILE_SIZE : rewrite_asdf_file(edited_text,asdf_text,oname,fname) else: @@ -436,7 +498,6 @@ def save_func ( fname, oname ) : print(f"is too large to rewrite.") print("Another method must be used to edit '{oname}'.") print(f"{msg_delim}\n") - ''' return From df07474cba425c94a1f8f3b495453b108862cf01 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 9 Sep 2020 09:33:15 -0400 Subject: [PATCH 12/47] Adding a check to make sure the block checks don't go out of bounds. --- asdf/commands/edit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 0bfc96b47..04aa9a3c0 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -367,6 +367,8 @@ def compute_block_index_blocks ( start, asdf_blocks ) : hsz = struct.unpack(">H",asdf_blocks[k+bmlen:k+bmlen+2])[0] used = struct.unpack(">Q",asdf_blocks[k+uidx:k+uidx+8])[0] k = k + bmlen + 2 + hsz + used + if k+bmlen > len(asdf_blocks) : + break if constants.BLOCK_MAGIC==asdf_blocks[k:k+bmlen] : bindex.append(k+start) else : From 59aa9256a3a1a4db26778d44d11d5f89194aa00d Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Thu, 10 Sep 2020 12:02:44 -0400 Subject: [PATCH 13/47] Making formatting and usage changes based on feedback on pull request. --- asdf/asdf.py | 19 +++++----- asdf/commands/edit.py | 84 ++++++++++++++++++++++--------------------- 2 files changed, 54 insertions(+), 49 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index ecc2eba45..6ab3e60f8 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -6,8 +6,6 @@ import warnings from pkg_resources import parse_version -import ipdb - import numpy as np from jsonschema import ValidationError @@ -37,8 +35,9 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata -def _get_asdf_version_in_comments( comments ): - """ From the initial comments line in an ASDF file, capture the ASDF version. +def _get_asdf_version_in_comments(comments): + """ + From the initial comments line in an ASDF file, capture the ASDF version. """ for comment in comments: parts = comment.split() @@ -52,9 +51,10 @@ def _get_asdf_version_in_comments( comments ): return None -def _parse_asdf_comment_section( content ): - """ Parses the comment section, between the header line and the - Tree or first block. +def _parse_asdf_comment_section(content): + """ + Parses the comment section, between the header line and the + Tree or first block. """ comments = [] @@ -66,8 +66,9 @@ def _parse_asdf_comment_section( content ): return comments -def _parse_asdf_header_line ( line ) : - """ Parses the header line (first line) of an ASDF file and verifies +def _parse_asdf_header_line(line): + """ + Parses the header line (first line) of an ASDF file and verifies it is properly formatted. """ parts = line.split() diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 04aa9a3c0..8af1a83da 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -31,7 +31,8 @@ class Edit(Command): @classmethod def setup_arguments(cls, subparsers): - """ Set up a command line argument parser for the edit subcommand. + """ + Set up a command line argument parser for the edit subcommand. """ desc_string = "Allows for easy editing of the YAML in an ASDF file. " \ "For edit mode, the YAML portion of an ASDF file is" \ @@ -69,12 +70,13 @@ def setup_arguments(cls, subparsers): @classmethod def run(cls, args): - """ Execute the edit subcommand. + """ + Execute the edit subcommand. """ return edit(args) -def is_yaml_file ( fname ) : +def is_yaml_file(fname): ''' Determines if a file is a YAML file based only on the file extension. @@ -89,7 +91,7 @@ def is_yaml_file ( fname ) : return True -def is_asdf_file ( fname ) : +def is_asdf_file(fname): ''' Determines if a file is ASDF based on file extension and the first 5 bytes of the file, which should be '#ASDF'. @@ -104,23 +106,23 @@ def is_asdf_file ( fname ) : return False with open(fname,"r+b") as fd : - first_string = "#ASDF" - first_line = fd.read(len(first_string)).decode('utf-8') - if first_string != first_line : + first_line = fd.read(len(constants.ASDF_MAGIC)) + if first_string != constants.ASDF_MAGIC: return False return True -def is_validate_path_and_ext ( fname, wanted_ext=None ) : - """ Validates the path exists and the extension is one wanted. +def is_validate_path_and_ext(fname, wanted_ext=None): + """ + Validates the path exists and the extension is one wanted. Parameters ---------- fname : The input file name. wanted_ext : List of extensions to check. """ - if not os.path.exists(fname) : + if not os.path.exists(fname): print(f"Error: No file '{fname}' exists.") return False @@ -136,8 +138,9 @@ def is_validate_path_and_ext ( fname, wanted_ext=None ) : return True -def is_validate_asdf_path ( fname ) : - """ Validates fname path exists and has extension '.asdf'. +def is_validate_asdf_path(fname): + """ + Validates fname path exists and has extension '.asdf'. Parameters ---------- @@ -150,8 +153,9 @@ def is_validate_asdf_path ( fname ) : return False -def is_validate_yaml_path ( fname ) : - """ Validates fname path exists and has extension '.yaml'. +def is_validate_yaml_path(fname): + """ + Validates fname path exists and has extension '.yaml'. Parameters ---------- @@ -164,39 +168,34 @@ def is_validate_yaml_path ( fname ) : return False -def validate_asdf_file ( fd ) : - """ Makes sure the header line is the expected one, as well +def validate_asdf_file(fd): + """ + Makes sure the header line is the expected one, as well as getting the optional comment line. Parameters ---------- fd : GenericFile """ - #global asdf_format_version - #global asdf_standard_version - ASDF_ID = b'#ASDF' header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) - if ASDF_ID!=header_line[:len(ASDF_ID)] : + if constants.ASDF_MAGIC!=header_line[:len(constants.ASDF_MAGIC)] : # Maybe raise exception print("Invalid ASDF ID") sys.exit(1) - #asdf_format_version = _parse_asdf_header_line(header_line) # Maybe validate ASDF format version comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', 5, "start of content", include=False, exception=False) - # Maybe do the following for more validate. But maybe not. - #comments = _parse_asdf_comment_section(comment_section) - #asdf_standard_version = _get_asdf_version_in_comments(comments) return header_line + comment_section -def open_and_validate_asdf ( fname ) : - """ Open and validate the ASDF file, as well as read in all the YAML +def open_and_validate_asdf(fname): + """ + Open and validate the ASDF file, as well as read in all the YAML that will be outputted to a YAML file. Parameters @@ -211,8 +210,9 @@ def open_and_validate_asdf ( fname ) : return fd, header_and_comment # Return GenericFile and ASDF header bytes. -def read_and_validate_yaml ( fd, fname ) : - """ Get the YAML text from an ASDF formatted file. +def read_and_validate_yaml(fd, fname): + """ + Get the YAML text from an ASDF formatted file. Parameters ---------- @@ -246,7 +246,7 @@ def read_and_validate_yaml ( fd, fname ) : return yaml_content -def edit_func ( fname, oname ) : +def edit_func(fname, oname): """ Creates a YAML file from an ASDF file. The YAML file will contain only the YAML from the ASDF file. The YAML text will be written to a YAML text file @@ -295,8 +295,9 @@ def edit_func ( fname, oname ) : return -def buffer_edited_text ( edited_text, orig_text ) : - """ There is more text in the original ASDF file than in the edited text, +def buffer_edited_text(edited_text, orig_text): + """ + There is more text in the original ASDF file than in the edited text, so we will buffer the edited text with spaces. """ diff = len(orig_text) - len(edited_text) @@ -322,8 +323,9 @@ def buffer_edited_text ( edited_text, orig_text ) : return buffered_text, diff-1 -def add_buffer_to_new_text ( edited_text, buffer_size ) : - """ Adds buffer to edited text. +def add_buffer_to_new_text(edited_text, buffer_size): + """ + Adds buffer to edited text. """ wdelim = b'\r\n...\r\n' ldelim = b'\n...\n' @@ -343,8 +345,9 @@ def add_buffer_to_new_text ( edited_text, buffer_size ) : return buffered_text -def compute_block_index_blocks ( start, asdf_blocks ) : - """ Computes new block index and strips any data after last found block. +def compute_block_index_blocks(start, asdf_blocks): + """ + Computes new block index and strips any data after last found block. """ if constants.BLOCK_MAGIC!=asdf_blocks[:len(constants.BLOCK_MAGIC)] : return [], asdf_blocks # Not sure if this should happen @@ -376,7 +379,7 @@ def compute_block_index_blocks ( start, asdf_blocks ) : return bindex, asdf_blocks[:k] -def write_block_index ( fd, index ) : +def write_block_index(fd, index): if len(index) < 1 : return @@ -390,8 +393,9 @@ def write_block_index ( fd, index ) : return -def rewrite_asdf_file ( edited_text, orig_text, oname, fname ) : - """ Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, +def rewrite_asdf_file(edited_text, orig_text, oname, fname): + """ + Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, the blocks will be rewritten. A block index will also be rewritten. If a block index existed in the old file, it will have to be recomputed to because of the larger YAML size and buffer, which changes the location of @@ -435,7 +439,7 @@ def rewrite_asdf_file ( edited_text, orig_text, oname, fname ) : print(f"as a buffer for the text in '{oname}' to allow for future edits.") print(f"{delim}\n") -def save_func ( fname, oname ) : +def save_func(fname, oname): """ Checks to makes sure a corresponding ASDF file exists. This is done by seeing if a file of the same name with '.asdf' as an extension exists. @@ -503,7 +507,7 @@ def save_func ( fname, oname ) : return -def edit ( args ) : +def edit(args): """ Implode a given ASDF file, which may reference external data, back into a single ASDF file. From 0f2a04d66df9ae7b542552d643a41a4429c36ce5 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Fri, 11 Sep 2020 09:09:06 -0400 Subject: [PATCH 14/47] Removing debug tracing. --- asdf/asdf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index 6ab3e60f8..34f4a372c 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -908,7 +908,6 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', self._blocks.read_block_index(fd, self) # Function 7 References - ipdb.set_trace() tree = reference.find_references(tree, self) # Function 8 Schemas From dc6cba31b87d09acc89dfba48bb2febd79f6f262 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Fri, 11 Sep 2020 09:17:17 -0400 Subject: [PATCH 15/47] Removing comment. --- asdf/asdf.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index 34f4a372c..a1c8d3968 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -806,10 +806,6 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', ignore_missing_extensions=False, **kwargs): """Attempt to populate AsdfFile data from file-like object""" - # Generally, I think this function should be made smaller. There are - # many steps being taken here that can be broken down, with expressible - # function names to make clearer what this function does. - # Make sure arguments aren't contradictory if strict_extension_check and ignore_missing_extensions: raise ValueError( From 9fb3a8874236f2473d542d4cc6f1b7ca3deed0c3 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Mon, 14 Sep 2020 16:03:50 -0400 Subject: [PATCH 16/47] Integrating feedback from the pull request. --- asdf/asdf.py | 80 ++++++++++++++++--------------------------- asdf/commands/edit.py | 28 ++++++--------- 2 files changed, 41 insertions(+), 67 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index a1c8d3968..eeb6eb43d 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -35,53 +35,6 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata -def _get_asdf_version_in_comments(comments): - """ - From the initial comments line in an ASDF file, capture the ASDF version. - """ - for comment in comments: - parts = comment.split() - if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: - try: - version = versioning.AsdfVersion(parts[1].decode('ascii')) - except ValueError: - pass - else: - return version - - return None - -def _parse_asdf_comment_section(content): - """ - Parses the comment section, between the header line and the - Tree or first block. - """ - comments = [] - - lines = content.splitlines() - for line in lines: - if not line.startswith(b'#'): - raise ValueError("Invalid content between header and tree") - comments.append(line[1:].strip()) - - return comments - -def _parse_asdf_header_line(line): - """ - Parses the header line (first line) of an ASDF file and verifies - it is properly formatted. - """ - parts = line.split() - if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: - raise ValueError("Does not appear to be a ASDF file.") - - try: - version = versioning.AsdfVersion(parts[1].decode('ascii')) - except ValueError: - raise ValueError("Unparseable version in ASDF file: {0}".format(parts[1])) - - return version - def get_asdf_library_info(): """ Get information about asdf to include in the asdf_library entry @@ -779,7 +732,16 @@ def _parse_header_line(cls, line): """ Parses the header line in a ASDF file to obtain the ASDF version. """ - return _parse_asdf_header_line(line) + parts = line.split() + if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: + raise ValueError("Does not appear to be a ASDF file.") + + try: + version = versioning.AsdfVersion(parts[1].decode('ascii')) + except ValueError: + raise ValueError("Unparseable version in ASDF file: {0}".format(parts[1])) + + return version @classmethod def _parse_comment_section(cls, content): @@ -787,14 +749,32 @@ def _parse_comment_section(cls, content): Parses the comment section, between the header line and the Tree or first block. """ - return _parse_asdf_comment_section(content) + comments = [] + + lines = content.splitlines() + for line in lines: + if not line.startswith(b'#'): + raise ValueError("Invalid content between header and tree") + comments.append(line[1:].strip()) + + return comments @classmethod def _find_asdf_version_in_comments(cls, comments): """ From the initial comments line in an ASDF file, capture the ASDF version. """ - return _get_asdf_version_in_comments(comments) + for comment in comments: + parts = comment.split() + if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: + try: + version = versioning.AsdfVersion(parts[1].decode('ascii')) + except ValueError: + pass + else: + return version + + return None @classmethod def _open_asdf(cls, self, fd, uri=None, mode='r', diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 8af1a83da..3124835f7 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -10,10 +10,6 @@ import asdf.constants as constants -from asdf.asdf import _parse_asdf_header_line -from asdf.asdf import _parse_asdf_comment_section -from asdf.asdf import _get_asdf_version_in_comments - from .. import AsdfFile from .. import generic_io from .. import reference @@ -24,10 +20,6 @@ __all__ = ['edit'] -#asdf_format_version = None -#asdf_standard_version = None - - class Edit(Command): @classmethod def setup_arguments(cls, subparsers): @@ -107,7 +99,7 @@ def is_asdf_file(fname): with open(fname,"r+b") as fd : first_line = fd.read(len(constants.ASDF_MAGIC)) - if first_string != constants.ASDF_MAGIC: + if not first_string.startswith(constants.ASDF_MAGIC): return False return True @@ -168,7 +160,7 @@ def is_validate_yaml_path(fname): return False -def validate_asdf_file(fd): +def check_asdf_header(fd): """ Makes sure the header line is the expected one, as well as getting the optional comment line. @@ -179,7 +171,7 @@ def validate_asdf_file(fd): """ header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) - if constants.ASDF_MAGIC!=header_line[:len(constants.ASDF_MAGIC)] : + if not header_line.startswith(constants.ASDF_MAGIC): # Maybe raise exception print("Invalid ASDF ID") sys.exit(1) @@ -193,7 +185,7 @@ def validate_asdf_file(fd): return header_line + comment_section -def open_and_validate_asdf(fname): +def open_and_check_asdf_header(fname): """ Open and validate the ASDF file, as well as read in all the YAML that will be outputted to a YAML file. @@ -206,7 +198,7 @@ def open_and_validate_asdf(fname): fd = generic_io.get_file(fullpath, mode="r") # Read the ASDF header and optional comments section - header_and_comment = validate_asdf_file(fd) + header_and_comment = check_asdf_header(fd) return fd, header_and_comment # Return GenericFile and ASDF header bytes. @@ -261,7 +253,7 @@ def edit_func(fname, oname): return False # Validate input file is an ASDF file. - fd, asdf_text = open_and_validate_asdf(fname) + fd, asdf_text = open_and_check_asdf_header(fname) # Read and validate the YAML of an ASDF file. yaml_text = read_and_validate_yaml(fd,fname) @@ -466,13 +458,13 @@ def save_func(fname, oname): return False # Validate input file is an ASDF formatted YAML. - ifd, iasdf_text = open_and_validate_asdf(fname) + ifd, iasdf_text = open_and_check_asdf_header(fname) iyaml_text = read_and_validate_yaml(ifd,fname) ifd.close() edited_text = iasdf_text + iyaml_text # Get text from ASDF file. - ofd, oasdf_text = open_and_validate_asdf(oname) + ofd, oasdf_text = open_and_check_asdf_header(oname) oyaml_text = read_and_validate_yaml(ofd,oname) ofd.close() asdf_text = oasdf_text + oyaml_text @@ -492,9 +484,11 @@ def save_func(fname, oname): fd.write(buffered_text) print(f"\n{msg_delim}") print(f"The edited text in '{fname}' was written to '{oname}'") - print(f"Added a {diff} buffer of spaces between the YAML text and binary blocks.") + print(f"Added a '\n' and {diff} buffer of spaces between the YAML text and binary blocks.") print(f"{msg_delim}\n") else : + # Should pass trees and figure out how to replace tree in output AsdfFile + # with the input tree, then the output can simply call 'write_to'. if os.stat(oname).st_size <= SMALL_FILE_SIZE : rewrite_asdf_file(edited_text,asdf_text,oname,fname) else: From d2a6c7c48d0602d7cb888fb829050923c0ee3a89 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Thu, 17 Sep 2020 08:56:21 -0400 Subject: [PATCH 17/47] Changing edit subcommand based on pull request feedback. --- asdf/commands/edit.py | 182 +++++++++++++++++++++--------------------- 1 file changed, 89 insertions(+), 93 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 3124835f7..7bad3a3d1 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -78,7 +78,7 @@ def is_yaml_file(fname): ''' base, ext = os.path.splitext(fname) - if '.yaml' != ext : + if '.yaml' != ext: return False return True @@ -94,10 +94,10 @@ def is_asdf_file(fname): ''' base, ext = os.path.splitext(fname) - if '.asdf' != ext : + if '.asdf' != ext: return False - with open(fname,"r+b") as fd : + with open(fname,"r+b") as fd: first_line = fd.read(len(constants.ASDF_MAGIC)) if not first_string.startswith(constants.ASDF_MAGIC): return False @@ -139,7 +139,7 @@ def is_validate_asdf_path(fname): fname : The input file name. """ ext = ['.asdf'] - if is_validate_path_and_ext(fname,ext) : + if is_validate_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") return False @@ -154,7 +154,7 @@ def is_validate_yaml_path(fname): fname : The input file name. """ ext = ['.yaml'] - if is_validate_path_and_ext(fname,ext) : + if is_validate_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") return False @@ -184,6 +184,7 @@ def check_asdf_header(fd): exception=False) return header_line + comment_section + def open_and_check_asdf_header(fname): """ @@ -200,7 +201,8 @@ def open_and_check_asdf_header(fname): # Read the ASDF header and optional comments section header_and_comment = check_asdf_header(fd) - return fd, header_and_comment # Return GenericFile and ASDF header bytes. + return fd, header_and_comment # Return GenericFile and ASDF header bytes. + def read_and_validate_yaml(fd, fname): """ @@ -213,7 +215,7 @@ def read_and_validate_yaml(fd, fname): """ YAML_TOKEN = b'%YAML' token = fd.read(len(YAML_TOKEN)) - if token != YAML_TOKEN : + if token != YAML_TOKEN: # Maybe raise exception print(f"Error: No YAML in '{fname}'") sys.exit(0) @@ -234,7 +236,7 @@ def read_and_validate_yaml(fd, fname): print("Error: 'yamlutil.load_tree' failed to return a tree.") sys.exist(1) - schema.validate(tree, None) # Failure raises and exception. + schema.validate(tree, None) # Failure raises an exception. return yaml_content @@ -249,7 +251,7 @@ def edit_func(fname, oname): fname : The input ASDF file name. oname : The output YAML file name. """ - if not is_validate_asdf_path(fname) : + if not is_validate_asdf_path(fname): return False # Validate input file is an ASDF file. @@ -259,13 +261,13 @@ def edit_func(fname, oname): yaml_text = read_and_validate_yaml(fd,fname) # Open a YAML file for the ASDF YAML. - if not is_yaml_file(oname) : + if not is_yaml_file(oname): # Raise an exception print(f"Error: '{oname}' must have '.yaml' extension.") sys.exit(1) # Write the YAML for the original ASDF file. - with open(oname,"wb") as ofd : + with open(oname,"wb") as ofd: ofd.write(asdf_text) ofd.write(yaml_text) @@ -293,15 +295,15 @@ def buffer_edited_text(edited_text, orig_text): so we will buffer the edited text with spaces. """ diff = len(orig_text) - len(edited_text) - if diff<1 : + if diff < 1: print("Error: shouldn't be here.") sys.exit(1) wdelim = b'\r\n...\r\n' ldelim = b'\n...\n' - if edited_text[-len(wdelim):]==wdelim : + if edited_text[-len(wdelim) :]==wdelim: delim = wdelim - elif edited_text[-len(ldelim):]==ldelim : + elif edited_text[-len(ldelim) :]==ldelim: delim = ldelim else: # Mabye raise exception @@ -311,8 +313,8 @@ def buffer_edited_text(edited_text, orig_text): sys.exit(1) # May not be correct. If on Windows use '\r\n'. - buffered_text = edited_text[:-len(delim)] + b'\n' + b' '*(diff-1) + delim - return buffered_text, diff-1 + buffered_text = edited_text[: -len(delim)] + b'\n' + b' '*(diff - 1) + delim + return buffered_text, diff - 1 def add_buffer_to_new_text(edited_text, buffer_size): @@ -321,9 +323,9 @@ def add_buffer_to_new_text(edited_text, buffer_size): """ wdelim = b'\r\n...\r\n' ldelim = b'\n...\n' - if edited_text[-len(wdelim):]==wdelim : + if edited_text[-len(wdelim) :]==wdelim: delim = wdelim - elif edited_text[-len(ldelim):]==ldelim : + elif edited_text[-len(ldelim) :]==ldelim: delim = ldelim else: # Maybe raise exception @@ -333,57 +335,42 @@ def add_buffer_to_new_text(edited_text, buffer_size): sys.exit(1) buf = b' ' * buffer_size - buffered_text = edited_text[:-len(delim)] + b'\n' + buf + delim + buffered_text = edited_text[: -len(delim)] + b'\n' + buf + delim return buffered_text - -def compute_block_index_blocks(start, asdf_blocks): - """ - Computes new block index and strips any data after last found block. - """ - if constants.BLOCK_MAGIC!=asdf_blocks[:len(constants.BLOCK_MAGIC)] : - return [], asdf_blocks # Not sure if this should happen - - # Minimum block header is - # 4 bytes of magic number - # 2 bytes of header length, after the length field (min 48) - # 4 bytes flag - # 4 bytes compression - # 8 bytes allocated size - # 8 bytes used (on disk) size - # 8 bytes data size - # 16 bytes checksum - bmlen = len(constants.BLOCK_MAGIC) - min_header = bmlen + 2 + 48 - uidx = 22 - bindex = [start] - k = 0 - while len(asdf_blocks) - k > min_header : - hsz = struct.unpack(">H",asdf_blocks[k+bmlen:k+bmlen+2])[0] - used = struct.unpack(">Q",asdf_blocks[k+uidx:k+uidx+8])[0] - k = k + bmlen + 2 + hsz + used - if k+bmlen > len(asdf_blocks) : - break - if constants.BLOCK_MAGIC==asdf_blocks[k:k+bmlen] : - bindex.append(k+start) - else : - break - return bindex, asdf_blocks[:k] - + def write_block_index(fd, index): - if len(index) < 1 : + if len(index) < 1: return - bindex_hdr = b'#ASDF BLOCK INDEX\n%YAML 1.1\n---\n' + bindex_hdr = b"#ASDF BLOCK INDEX\n%YAML 1.1\n---\n" fd.write(bindex_hdr) - for idx in index : + for idx in index: ostr = f'- {idx}\n' fd.write(ostr.encode('utf-8')) end = b'...' fd.write(end) return +def get_next_block_header(fd): + # Minimum block header is + # 4 bytes of magic number + # 2 bytes of header length, after the length field (min 48) + # 4 bytes flag + # 4 bytes compression + # 8 bytes allocated size + # 8 bytes used (on disk) size + # 8 bytes data size + # 16 bytes checksum + blk_header = fd.read(6) + if len(blk_header) != 6: + return None + if not blk_header.startswith(constants.BLOCK_MAGIC): + return None + hsz = struct.unpack(">H",blk_header[4:6])[0] + header = fd.read(hsz) + return blk_header + header def rewrite_asdf_file(edited_text, orig_text, oname, fname): """ @@ -401,23 +388,44 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): fname : the edit YAML to write to new file. """ - tmp_oname = oname + '.tmp' # Save as a temp file, in case anything goes wrong. + tmp_oname = oname + '.tmp' # Save as a temp file, in case anything goes wrong. buffer_size = 10 * 1000 buffered_text = add_buffer_to_new_text(edited_text,buffer_size) - with open(oname,"r+b") as fd : - orig_buffer = fd.read() # Small enough to simply read the whole thing + ifd = open(oname,"r+b") # Open old ASDF to get binary blocks + ifd.seek(len(orig_text)) + + ofd = open(tmp_oname,"w+b") # Open temp file to write + ofd.write(buffered_text) # Write edited YAML - # Get the binary blocks, compute the new block index, and strip old block - # index, if it exists. - asdf_blocks = orig_buffer[len(orig_text):] - index, asdf_blocks = compute_block_index_blocks(len(buffered_text),asdf_blocks) - out_bytes = buffered_text + asdf_blocks + current_location = len(buffered_text) + block_index = [] + alloc_loc = 14 # 4 bytes of block ID, 2 blocks of size, 8 blocks into header + block_chunk = 2048 + while True: + next_block = get_next_block_header(ifd) + if next_block is None: + break + + # Get block size on disk + alloc = struct.unpack(">Q",next_block[alloc_loc:alloc_loc+8])[0] + + # Save block location for block index + block_index.append(current_location) + current_location = current_location + len(next_block) + alloc - # Write new file with edited text, buffer, and recomputed block index. - with open(tmp_oname,"w+b") as fd : - fd.write(out_bytes) - write_block_index(fd,index) + # Copy block + ofd.write(next_block) + while alloc >= block_chunk: + chunk = ifd.read(block_chunk) + ofd.write(chunk) + alloc -= block_chunk + if alloc>0: + chunk = ifd.read(alloc) + ofd.write(chunk) + + + write_block_index(ofd,block_index) # Rename temp file. os.rename(tmp_oname,oname) @@ -427,8 +435,9 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): print(f"\n{delim}") print(f"The text in '{fname}' was too large to simply overwrite the") print(f"text in '{oname}'. The file '{oname}' was rewritten to") - print(f"accommodate the larger text size. Also, {buffer_size:,} bytes") - print(f"as a buffer for the text in '{oname}' to allow for future edits.") + print(f"accommodate the larger text size.") + print(f"Also, added a '\\n' and {buffer_size:,} spaces as a buffer for") + print(f"the text in '{oname}' to allow for future edits.") print(f"{delim}\n") def save_func(fname, oname): @@ -447,9 +456,6 @@ def save_func(fname, oname): fname : The input YAML file. oname : The output ASDF file name. """ - _1G = 1000**3 # 1 gig - C = 1 # constant multiple of gig - SMALL_FILE_SIZE = C * _1G if not is_validate_yaml_path(fname): return False @@ -472,32 +478,22 @@ def save_func(fname, oname): # Compare text sizes and maybe output. # There are three cases: msg_delim = '*' * 70 - if len(edited_text) == len(asdf_text) : - with open(oname,"r+b") as fd : + if len(edited_text) == len(asdf_text): + with open(oname,"r+b") as fd: fd.write(edited_text) print(f"\n{msg_delim}") print(f"The edited text in '{fname}' was written to '{oname}'") print(f"{msg_delim}\n") - elif len(edited_text) < len(asdf_text) : + elif len(edited_text) < len(asdf_text): buffered_text, diff = buffer_edited_text(edited_text,asdf_text) - with open(oname,"r+b") as fd : + with open(oname,"r+b") as fd: fd.write(buffered_text) print(f"\n{msg_delim}") print(f"The edited text in '{fname}' was written to '{oname}'") - print(f"Added a '\n' and {diff} buffer of spaces between the YAML text and binary blocks.") + print(f"Added a '\\n' and {diff} buffer of spaces between the YAML text and binary blocks.") print(f"{msg_delim}\n") - else : - # Should pass trees and figure out how to replace tree in output AsdfFile - # with the input tree, then the output can simply call 'write_to'. - if os.stat(oname).st_size <= SMALL_FILE_SIZE : - rewrite_asdf_file(edited_text,asdf_text,oname,fname) - else: - print(f"\n{msg_delim}") - print(f"Cannot write the text from '{fname}' to '{oname}'.") - print(f"There is too much edited text to write and the ASDF file") - print(f"is too large to rewrite.") - print("Another method must be used to edit '{oname}'.") - print(f"{msg_delim}\n") + else: + rewrite_asdf_file(edited_text,asdf_text,oname,fname) return @@ -510,11 +506,11 @@ def edit(args): ---------- args : The command line arguments. """ - if args.edit : + if args.edit: return edit_func(args.fname,args.oname) - elif args.save : + elif args.save: return save_func(args.fname,args.oname) - else : + else: return print("Invalid arguments") From 71874671676a1fb1ecde666ac5c28b9d1616b9df Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Thu, 17 Sep 2020 09:43:08 -0400 Subject: [PATCH 18/47] Correcting python formatting based on flake8 report. --- asdf/commands/edit.py | 187 ++++++++++++++++++------------------------ 1 file changed, 79 insertions(+), 108 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 7bad3a3d1..1b71cd661 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -10,9 +10,7 @@ import asdf.constants as constants -from .. import AsdfFile from .. import generic_io -from .. import reference from .. import schema from .. import yamlutil @@ -23,7 +21,7 @@ class Edit(Command): @classmethod def setup_arguments(cls, subparsers): - """ + """ Set up a command line argument parser for the edit subcommand. """ desc_string = "Allows for easy editing of the YAML in an ASDF file. " \ @@ -34,26 +32,37 @@ def setup_arguments(cls, subparsers): # Set up the parser parser = subparsers.add_parser( - str("edit"), help="Edit YAML portion of an ASDF file.", + str("edit"), + help="Edit YAML portion of an ASDF file.", description=desc_string) # Need an input file parser.add_argument( - '--infile', '-f', type=str, required=True, dest='fname', + '--infile', '-f', + type=str, + required=True, + dest='fname', help="Input file (ASDF for -e option, YAML for -s option") # Need an output file parser.add_argument( - '--outfile', '-o', type=str, required=True, dest='oname', + '--outfile', '-o', + type=str, + required=True, + dest='oname', help="Output file (YAML for -e option, ASDF for -s option") # The edit is either being performed or saved group = parser.add_mutually_exclusive_group(required=True) group.add_argument( - '-s',action='store_true',dest='save', + '-s', + action='store_true', + dest='save', help="Saves a YAML text file to its ASDF file. Requires an ASDF input file.") group.add_argument( - '-e',action='store_true',dest='edit', + '-e', + action='store_true', + dest='edit', help="Create a YAML text file for a ASDF file. Requires a YAML input file.") parser.set_defaults(func=cls.run) @@ -62,20 +71,20 @@ def setup_arguments(cls, subparsers): @classmethod def run(cls, args): - """ + """ Execute the edit subcommand. """ return edit(args) def is_yaml_file(fname): - ''' + """ Determines if a file is a YAML file based only on the file extension. Parameters ---------- fname : The input file name. - ''' + """ base, ext = os.path.splitext(fname) if '.yaml' != ext: @@ -83,30 +92,8 @@ def is_yaml_file(fname): return True -def is_asdf_file(fname): - ''' - Determines if a file is ASDF based on file extension and the first - 5 bytes of the file, which should be '#ASDF'. - - Parameters - ---------- - fname : The input file name. - ''' - - base, ext = os.path.splitext(fname) - if '.asdf' != ext: - return False - - with open(fname,"r+b") as fd: - first_line = fd.read(len(constants.ASDF_MAGIC)) - if not first_string.startswith(constants.ASDF_MAGIC): - return False - - return True - - def is_validate_path_and_ext(fname, wanted_ext=None): - """ + """ Validates the path exists and the extension is one wanted. Parameters @@ -119,9 +106,9 @@ def is_validate_path_and_ext(fname, wanted_ext=None): return False # Simply validates the path existence - if wanted_ext is None: + if wanted_ext is None: return True - + # Make sure the extension is one desired. base, ext = os.path.splitext(fname) if ext not in wanted_ext: @@ -131,7 +118,7 @@ def is_validate_path_and_ext(fname, wanted_ext=None): def is_validate_asdf_path(fname): - """ + """ Validates fname path exists and has extension '.asdf'. Parameters @@ -139,14 +126,14 @@ def is_validate_asdf_path(fname): fname : The input file name. """ ext = ['.asdf'] - if is_validate_path_and_ext(fname, ext): + if is_validate_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") return False def is_validate_yaml_path(fname): - """ + """ Validates fname path exists and has extension '.yaml'. Parameters @@ -154,14 +141,14 @@ def is_validate_yaml_path(fname): fname : The input file name. """ ext = ['.yaml'] - if is_validate_path_and_ext(fname, ext): + if is_validate_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") return False def check_asdf_header(fd): - """ + """ Makes sure the header line is the expected one, as well as getting the optional comment line. @@ -172,22 +159,21 @@ def check_asdf_header(fd): header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) if not header_line.startswith(constants.ASDF_MAGIC): - # Maybe raise exception print("Invalid ASDF ID") sys.exit(1) - + # Maybe validate ASDF format version - comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', - 5, - "start of content", - include=False, + comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', + 5, + "start of content", + include=False, exception=False) - return header_line + comment_section + return header_line + comment_section + - def open_and_check_asdf_header(fname): - """ + """ Open and validate the ASDF file, as well as read in all the YAML that will be outputted to a YAML file. @@ -203,9 +189,9 @@ def open_and_check_asdf_header(fname): return fd, header_and_comment # Return GenericFile and ASDF header bytes. - + def read_and_validate_yaml(fd, fname): - """ + """ Get the YAML text from an ASDF formatted file. Parameters @@ -216,15 +202,14 @@ def read_and_validate_yaml(fd, fname): YAML_TOKEN = b'%YAML' token = fd.read(len(YAML_TOKEN)) if token != YAML_TOKEN: - # Maybe raise exception print(f"Error: No YAML in '{fname}'") sys.exit(0) - + # Get YAML reader and content - reader = fd.reader_until(constants.YAML_END_MARKER_REGEX, - 7, - 'End of YAML marker', - include=True, + reader = fd.reader_until(constants.YAML_END_MARKER_REGEX, + 7, + 'End of YAML marker', + include=True, initial_content=token) yaml_content = reader.read() @@ -232,10 +217,9 @@ def read_and_validate_yaml(fd, fname): # The YAML text must be converted to a stream. tree = yamlutil.load_tree(io.BytesIO(yaml_content)) if tree is None: - # Maybe raise exception. print("Error: 'yamlutil.load_tree' failed to return a tree.") sys.exist(1) - + schema.validate(tree, None) # Failure raises an exception. return yaml_content @@ -258,23 +242,21 @@ def edit_func(fname, oname): fd, asdf_text = open_and_check_asdf_header(fname) # Read and validate the YAML of an ASDF file. - yaml_text = read_and_validate_yaml(fd,fname) + yaml_text = read_and_validate_yaml(fd, fname) # Open a YAML file for the ASDF YAML. - if not is_yaml_file(oname): - # Raise an exception - print(f"Error: '{oname}' must have '.yaml' extension.") + if not is_yaml_file(oname): sys.exit(1) # Write the YAML for the original ASDF file. - with open(oname,"wb") as ofd: + with open(oname, "wb") as ofd: ofd.write(asdf_text) ofd.write(yaml_text) # Output message to user. delim = '*' * 70 print(f"\n{delim}") - print("ASDF formatting and YAML schema validated.") + print("ASDF formatting and YAML schema validated.") print(f"The text portion of '{fname}' is written to:") print(f" '{oname}'") print(f"The file '{oname}' can be edited using your favorite text editor.") @@ -290,10 +272,10 @@ def edit_func(fname, oname): return def buffer_edited_text(edited_text, orig_text): - """ + """ There is more text in the original ASDF file than in the edited text, so we will buffer the edited text with spaces. - """ + """ diff = len(orig_text) - len(edited_text) if diff < 1: print("Error: shouldn't be here.") @@ -306,7 +288,6 @@ def buffer_edited_text(edited_text, orig_text): elif edited_text[-len(ldelim) :]==ldelim: delim = ldelim else: - # Mabye raise exception print("Unrecognized YAML delimiter ending the YAML text.") print(f"It should be {wdelim} or {ldelim}, but the") print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") @@ -314,11 +295,11 @@ def buffer_edited_text(edited_text, orig_text): # May not be correct. If on Windows use '\r\n'. buffered_text = edited_text[: -len(delim)] + b'\n' + b' '*(diff - 1) + delim - return buffered_text, diff - 1 + return buffered_text, diff - 1 def add_buffer_to_new_text(edited_text, buffer_size): - """ + """ Adds buffer to edited text. """ wdelim = b'\r\n...\r\n' @@ -328,7 +309,6 @@ def add_buffer_to_new_text(edited_text, buffer_size): elif edited_text[-len(ldelim) :]==ldelim: delim = ldelim else: - # Maybe raise exception print("Unrecognized YAML delimiter ending the YAML text.") print(f"It should be {wdelim} or {ldelim}, but the") print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") @@ -337,8 +317,8 @@ def add_buffer_to_new_text(edited_text, buffer_size): buf = b' ' * buffer_size buffered_text = edited_text[: -len(delim)] + b'\n' + buf + delim - return buffered_text - + return buffered_text + def write_block_index(fd, index): if len(index) < 1: @@ -354,7 +334,7 @@ def write_block_index(fd, index): return def get_next_block_header(fd): - # Minimum block header is + # Block header structure: # 4 bytes of magic number # 2 bytes of header length, after the length field (min 48) # 4 bytes flag @@ -368,16 +348,16 @@ def get_next_block_header(fd): return None if not blk_header.startswith(constants.BLOCK_MAGIC): return None - hsz = struct.unpack(">H",blk_header[4:6])[0] + hsz = struct.unpack(">H", blk_header[4:6])[0] header = fd.read(hsz) return blk_header + header def rewrite_asdf_file(edited_text, orig_text, oname, fname): - """ + """ Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, the blocks will be rewritten. A block index will also be rewritten. If a - block index existed in the old file, it will have to be recomputed to - because of the larger YAML size and buffer, which changes the location of + block index existed in the old file, it will have to be recomputed to + because of the larger YAML size and buffer, which changes the location of the binary blocks. Parameters @@ -387,17 +367,17 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): oname : the ASDF file to overwrite. fname : the edit YAML to write to new file. """ - + tmp_oname = oname + '.tmp' # Save as a temp file, in case anything goes wrong. buffer_size = 10 * 1000 - buffered_text = add_buffer_to_new_text(edited_text,buffer_size) + buffered_text = add_buffer_to_new_text(edited_text, buffer_size) - ifd = open(oname,"r+b") # Open old ASDF to get binary blocks - ifd.seek(len(orig_text)) + ifd = open(oname, "r+b") # Open old ASDF to get binary blocks + ifd.seek(len(orig_text)) - ofd = open(tmp_oname,"w+b") # Open temp file to write + ofd = open(tmp_oname, "w+b") # Open temp file to write ofd.write(buffered_text) # Write edited YAML - + current_location = len(buffered_text) block_index = [] alloc_loc = 14 # 4 bytes of block ID, 2 blocks of size, 8 blocks into header @@ -408,7 +388,7 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): break # Get block size on disk - alloc = struct.unpack(">Q",next_block[alloc_loc:alloc_loc+8])[0] + alloc = struct.unpack(">Q", next_block[alloc_loc:alloc_loc+8])[0] # Save block location for block index block_index.append(current_location) @@ -425,10 +405,10 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): ofd.write(chunk) - write_block_index(ofd,block_index) + write_block_index(ofd, block_index) # Rename temp file. - os.rename(tmp_oname,oname) + os.rename(tmp_oname, oname) # Output message to user. delim = '*' * 70 @@ -442,7 +422,7 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): def save_func(fname, oname): """ - Checks to makes sure a corresponding ASDF file exists. This is done by + Checks to makes sure a corresponding ASDF file exists. This is done by seeing if a file of the same name with '.asdf' as an extension exists. Checks to makes sure fname is a valid YAML file. If the YAML text is smaller than the YAML text in the ASDF file @@ -465,35 +445,35 @@ def save_func(fname, oname): # Validate input file is an ASDF formatted YAML. ifd, iasdf_text = open_and_check_asdf_header(fname) - iyaml_text = read_and_validate_yaml(ifd,fname) + iyaml_text = read_and_validate_yaml(ifd, fname) ifd.close() edited_text = iasdf_text + iyaml_text # Get text from ASDF file. ofd, oasdf_text = open_and_check_asdf_header(oname) - oyaml_text = read_and_validate_yaml(ofd,oname) + oyaml_text = read_and_validate_yaml(ofd, oname) ofd.close() asdf_text = oasdf_text + oyaml_text - # Compare text sizes and maybe output. + # Compare text sizes and maybe output. # There are three cases: msg_delim = '*' * 70 if len(edited_text) == len(asdf_text): - with open(oname,"r+b") as fd: + with open(oname, "r+b") as fd: fd.write(edited_text) print(f"\n{msg_delim}") print(f"The edited text in '{fname}' was written to '{oname}'") print(f"{msg_delim}\n") elif len(edited_text) < len(asdf_text): - buffered_text, diff = buffer_edited_text(edited_text,asdf_text) - with open(oname,"r+b") as fd: + buffered_text, diff = buffer_edited_text(edited_text, asdf_text) + with open(oname, "r+b") as fd: fd.write(buffered_text) print(f"\n{msg_delim}") print(f"The edited text in '{fname}' was written to '{oname}'") print(f"Added a '\\n' and {diff} buffer of spaces between the YAML text and binary blocks.") print(f"{msg_delim}\n") else: - rewrite_asdf_file(edited_text,asdf_text,oname,fname) + rewrite_asdf_file(edited_text, asdf_text, oname, fname) return @@ -504,20 +484,11 @@ def edit(args): Parameters ---------- - args : The command line arguments. + args : The command line arguments. """ if args.edit: - return edit_func(args.fname,args.oname) + return edit_func(args.fname, args.oname) elif args.save: - return save_func(args.fname,args.oname) + return save_func(args.fname, args.oname) else: return print("Invalid arguments") - - - - - - - - - From b193d2bb7375403980c2a92a8e43810cb134a8a7 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Thu, 17 Sep 2020 10:53:41 -0400 Subject: [PATCH 19/47] Changing formatting of files and removing unnecessary comments. --- asdf/asdf.py | 4 ---- asdf/commands/edit.py | 39 ++++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index eeb6eb43d..3ab54383c 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -869,7 +869,6 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', elif yaml_token != b'': raise IOError("ASDF file appears to contain garbage after header.") - # The variable tree gets overwritten mulitple times. Why? if tree is None: # At this point the tree should be tagged, but we want it to be # tagged with the core/asdf version appropriate to this file's @@ -877,16 +876,13 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', # to select the correct tag for us. tree = yamlutil.custom_tree_to_tagged_tree(AsdfObject(), self) - # Function 6 Process binary blocks if has_blocks: self._blocks.read_internal_blocks( fd, past_magic=True, validate_checksums=validate_checksums) self._blocks.read_block_index(fd, self) - # Function 7 References tree = reference.find_references(tree, self) - # Function 8 Schemas if self.version <= versioning.FILL_DEFAULTS_MAX_VERSION and legacy_fill_schema_defaults: schema.fill_defaults(tree, self, reading=True) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 1b71cd661..7acfc9ce2 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -54,16 +54,18 @@ def setup_arguments(cls, subparsers): # The edit is either being performed or saved group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( '-s', action='store_true', dest='save', - help="Saves a YAML text file to its ASDF file. Requires an ASDF input file.") + help="Saves a YAML text file to its ASDF file. Requires an YAML input file and ASDF output file.") + group.add_argument( '-e', action='store_true', dest='edit', - help="Create a YAML text file for a ASDF file. Requires a YAML input file.") + help="Create a YAML text file for a ASDF file. Requires a ASDF input file.") parser.set_defaults(func=cls.run) @@ -83,7 +85,7 @@ def is_yaml_file(fname): Parameters ---------- - fname : The input file name. + fname : The character string of the input file name. """ base, ext = os.path.splitext(fname) @@ -92,13 +94,13 @@ def is_yaml_file(fname): return True -def is_validate_path_and_ext(fname, wanted_ext=None): +def is_valid_path_and_ext(fname, wanted_ext=None): """ Validates the path exists and the extension is one wanted. Parameters ---------- - fname : The input file name. + fname : The character string of the input file name. wanted_ext : List of extensions to check. """ if not os.path.exists(fname): @@ -117,31 +119,31 @@ def is_validate_path_and_ext(fname, wanted_ext=None): return True -def is_validate_asdf_path(fname): +def is_valid_asdf_path(fname): """ Validates fname path exists and has extension '.asdf'. Parameters ---------- - fname : The input file name. + fname : The character string of the input file name. """ ext = ['.asdf'] - if is_validate_path_and_ext(fname, ext): + if is_valid_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") return False -def is_validate_yaml_path(fname): +def is_valid_yaml_path(fname): """ Validates fname path exists and has extension '.yaml'. Parameters ---------- - fname : The input file name. + fname : The character string of the input file name. """ ext = ['.yaml'] - if is_validate_path_and_ext(fname, ext): + if is_valid_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") return False @@ -162,7 +164,6 @@ def check_asdf_header(fd): print("Invalid ASDF ID") sys.exit(1) - # Maybe validate ASDF format version comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', 5, "start of content", @@ -179,7 +180,7 @@ def open_and_check_asdf_header(fname): Parameters ---------- - fname : The input file name. + fname : The character string of the input file name. """ fullpath = os.path.abspath(fname) fd = generic_io.get_file(fullpath, mode="r") @@ -196,7 +197,7 @@ def read_and_validate_yaml(fd, fname): Parameters ---------- - fname : The input file name. + fname : The character string of the input file name. fd : GenericFile for fname. """ YAML_TOKEN = b'%YAML' @@ -232,10 +233,10 @@ def edit_func(fname, oname): Parameters ---------- - fname : The input ASDF file name. - oname : The output YAML file name. + fname : The character string of the input ASDF file name. + oname : The character string of the output YAML file name. """ - if not is_validate_asdf_path(fname): + if not is_valid_asdf_path(fname): return False # Validate input file is an ASDF file. @@ -437,10 +438,10 @@ def save_func(fname, oname): oname : The output ASDF file name. """ - if not is_validate_yaml_path(fname): + if not is_valid_yaml_path(fname): return False - if not is_validate_asdf_path(oname): + if not is_valid_asdf_path(oname): return False # Validate input file is an ASDF formatted YAML. From dd5672f41d897bf2adc2d728b79aa57202c08f3b Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Fri, 18 Sep 2020 09:01:51 -0400 Subject: [PATCH 20/47] Adding function comments. --- asdf/commands/edit.py | 50 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 7acfc9ce2..17d5b9ddd 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -157,6 +157,10 @@ def check_asdf_header(fd): Parameters ---------- fd : GenericFile + + Return + ------ + The ASDF header line and the ASDF comment as bytes. """ header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) @@ -181,6 +185,10 @@ def open_and_check_asdf_header(fname): Parameters ---------- fname : The character string of the input file name. + + Return + ------ + File descriptor for ASDF file and the ASDF header and ASDF comments as bytes. """ fullpath = os.path.abspath(fname) fd = generic_io.get_file(fullpath, mode="r") @@ -199,6 +207,10 @@ def read_and_validate_yaml(fd, fname): ---------- fname : The character string of the input file name. fd : GenericFile for fname. + + Return + ------ + The YAML portion of an ASDF file as bytes. """ YAML_TOKEN = b'%YAML' token = fd.read(len(YAML_TOKEN)) @@ -276,6 +288,15 @@ def buffer_edited_text(edited_text, orig_text): """ There is more text in the original ASDF file than in the edited text, so we will buffer the edited text with spaces. + + Parameters + ---------- + edited_text - The text from the edited YAML file + orig_text - The text from the original ASDF file + + Return + ------ + The buffered text and the number of spaces added as buffer. """ diff = len(orig_text) - len(edited_text) if diff < 1: @@ -302,6 +323,15 @@ def buffer_edited_text(edited_text, orig_text): def add_buffer_to_new_text(edited_text, buffer_size): """ Adds buffer to edited text. + + Parameters + ---------- + edited_text - The text from the edited YAML file. + buffer_size - The number of spaces to add as a buffer. + + Return + ------ + Buffered text with the number of spaces requested as buffer. """ wdelim = b'\r\n...\r\n' ldelim = b'\n...\n' @@ -322,6 +352,14 @@ def add_buffer_to_new_text(edited_text, buffer_size): def write_block_index(fd, index): + """ + Write the block index to an ASDF file. + + Parameters + ---------- + fd - The output file to write the block index. + index - A list of locations for each block. + """ if len(index) < 1: return @@ -335,6 +373,18 @@ def write_block_index(fd, index): return def get_next_block_header(fd): + """ + From a file, gets the next block header. + + Parameters + ---------- + fd - The ASDF file to get the next block. + + Return + ------ + If a block is found, return the bytes of the block header. + Otherwise return None. + """ # Block header structure: # 4 bytes of magic number # 2 bytes of header length, after the length field (min 48) From 89f8de87eb93526496c8e3b6c879a129de89d9a3 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Mon, 21 Sep 2020 12:58:51 -0400 Subject: [PATCH 21/47] Reverting to original asdf.py file. --- asdf/asdf.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index 3ab54383c..50d160bc8 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -35,6 +35,7 @@ from .tags.core import AsdfObject, Software, HistoryEntry, ExtensionMetadata + def get_asdf_library_info(): """ Get information about asdf to include in the asdf_library entry @@ -739,7 +740,8 @@ def _parse_header_line(cls, line): try: version = versioning.AsdfVersion(parts[1].decode('ascii')) except ValueError: - raise ValueError("Unparseable version in ASDF file: {0}".format(parts[1])) + raise ValueError( + "Unparseable version in ASDF file: {0}".format(parts[1])) return version @@ -761,9 +763,6 @@ def _parse_comment_section(cls, content): @classmethod def _find_asdf_version_in_comments(cls, comments): - """ From the initial comments line in an ASDF file, capture the ASDF - version. - """ for comment in comments: parts = comment.split() if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: @@ -786,15 +785,12 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', ignore_missing_extensions=False, **kwargs): """Attempt to populate AsdfFile data from file-like object""" - # Make sure arguments aren't contradictory + if strict_extension_check and ignore_missing_extensions: raise ValueError( "'strict_extension_check' and 'ignore_missing_extensions' are " "incompatible options") - # Set local variables - # TODO From here to self._mode = mode, can put in a function - # TODO validate_on_read, legacy_fill_schema_defaults = validate_and_schema(kwargs) if "validate_on_read" in kwargs: warnings.warn( "The 'validate_on_read' argument is deprecated, set " @@ -815,19 +811,16 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', else: legacy_fill_schema_defaults = get_config().legacy_fill_schema_defaults - # Open the file self._mode = mode fd = generic_io.get_file(fd, mode=self._mode, uri=uri) self._fd = fd - - # Validate the ASDF header + # The filename is currently only used for tracing warning information self._fname = self._fd._uri if self._fd._uri else '' header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) self._file_format_version = cls._parse_header_line(header_line) self.version = self._file_format_version - # Read the optional comments line(s) comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', 5, "start of content", include=False, exception=False) @@ -843,8 +836,6 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', if extensions: self.extensions = extensions - # Read and validate YAML text. - # It's possible there is no YAML, so the next token could be BLOCK_MAGIC yaml_token = fd.read(4) has_blocks = False tree = None @@ -888,7 +879,6 @@ def _open_asdf(cls, self, fd, uri=None, mode='r', if validate_on_read: try: - # TODO Validation will take some work to separate from the class. self._validate(tree, reading=True) except ValidationError: self.close() From 10dc02f41bab52b9f2806675e45757493cdb9013 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Mon, 21 Sep 2020 13:01:12 -0400 Subject: [PATCH 22/47] Reverting commands/__init__.py to original style. --- asdf/commands/__init__.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/asdf/commands/__init__.py b/asdf/commands/__init__.py index 3a084d63e..a2a819516 100644 --- a/asdf/commands/__init__.py +++ b/asdf/commands/__init__.py @@ -7,19 +7,11 @@ from .tags import list_tags from .extension import find_extensions from .info import info -from .edit import edit -__all__ = [ - 'defragment', - 'diff', - 'edit', - 'explode', - 'find_extensions', - 'implode', - 'info' - 'list_tags', - 'to_yaml', -] + +__all__ = [ 'implode', 'explode', 'to_yaml', 'defragment', 'diff', 'list_tags', + 'find_extensions', 'info', 'edit'] + # Extracting ASDF-in-FITS files requires Astropy if importlib.util.find_spec('astropy'): From 55f02b06be75e964b255b1d69155c0c84e820c97 Mon Sep 17 00:00:00 2001 From: Kenneth Macdonald Date: Fri, 14 Aug 2020 10:16:32 -0400 Subject: [PATCH 23/47] Adding basic edit command to asdftool. --- asdf/commands/__init__.py | 5 +- asdf/commands/edit.py | 545 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 548 insertions(+), 2 deletions(-) create mode 100644 asdf/commands/edit.py diff --git a/asdf/commands/__init__.py b/asdf/commands/__init__.py index 7e8e41cc8..3a45d8412 100644 --- a/asdf/commands/__init__.py +++ b/asdf/commands/__init__.py @@ -7,10 +7,11 @@ from .tags import list_tags from .extension import find_extensions from .info import info +from .edit import edit -__all__ = ['implode', 'explode', 'to_yaml', 'defragment', 'diff', 'list_tags', - 'find_extensions', 'info'] +__all__ = [ 'implode', 'explode', 'to_yaml', 'defragment', 'diff', 'list_tags', + 'find_extensions', 'info', 'edit'] # Extracting ASDF-in-FITS files requires Astropy diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py new file mode 100644 index 000000000..17d5b9ddd --- /dev/null +++ b/asdf/commands/edit.py @@ -0,0 +1,545 @@ +""" +Contains commands for lightweight text editing of an ASDF file. +Future work: Make this interactive editing. +""" + +import io +import os +import struct +import sys + +import asdf.constants as constants + +from .. import generic_io +from .. import schema +from .. import yamlutil + +from .main import Command + +__all__ = ['edit'] + +class Edit(Command): + @classmethod + def setup_arguments(cls, subparsers): + """ + Set up a command line argument parser for the edit subcommand. + """ + desc_string = "Allows for easy editing of the YAML in an ASDF file. " \ + "For edit mode, the YAML portion of an ASDF file is" \ + "separated from the ASDF into a text file for easy" \ + "editing. For save mode, the edited text file is written" \ + "to its ASDF file." + + # Set up the parser + parser = subparsers.add_parser( + str("edit"), + help="Edit YAML portion of an ASDF file.", + description=desc_string) + + # Need an input file + parser.add_argument( + '--infile', '-f', + type=str, + required=True, + dest='fname', + help="Input file (ASDF for -e option, YAML for -s option") + + # Need an output file + parser.add_argument( + '--outfile', '-o', + type=str, + required=True, + dest='oname', + help="Output file (YAML for -e option, ASDF for -s option") + + # The edit is either being performed or saved + group = parser.add_mutually_exclusive_group(required=True) + + group.add_argument( + '-s', + action='store_true', + dest='save', + help="Saves a YAML text file to its ASDF file. Requires an YAML input file and ASDF output file.") + + group.add_argument( + '-e', + action='store_true', + dest='edit', + help="Create a YAML text file for a ASDF file. Requires a ASDF input file.") + + parser.set_defaults(func=cls.run) + + return parser + + @classmethod + def run(cls, args): + """ + Execute the edit subcommand. + """ + return edit(args) + + +def is_yaml_file(fname): + """ + Determines if a file is a YAML file based only on the file extension. + + Parameters + ---------- + fname : The character string of the input file name. + """ + + base, ext = os.path.splitext(fname) + if '.yaml' != ext: + return False + return True + + +def is_valid_path_and_ext(fname, wanted_ext=None): + """ + Validates the path exists and the extension is one wanted. + + Parameters + ---------- + fname : The character string of the input file name. + wanted_ext : List of extensions to check. + """ + if not os.path.exists(fname): + print(f"Error: No file '{fname}' exists.") + return False + + # Simply validates the path existence + if wanted_ext is None: + return True + + # Make sure the extension is one desired. + base, ext = os.path.splitext(fname) + if ext not in wanted_ext: + return False + + return True + + +def is_valid_asdf_path(fname): + """ + Validates fname path exists and has extension '.asdf'. + + Parameters + ---------- + fname : The character string of the input file name. + """ + ext = ['.asdf'] + if is_valid_path_and_ext(fname, ext): + return True + print(f"Error: '{fname}' should have extension '{ext[0]}'") + return False + + +def is_valid_yaml_path(fname): + """ + Validates fname path exists and has extension '.yaml'. + + Parameters + ---------- + fname : The character string of the input file name. + """ + ext = ['.yaml'] + if is_valid_path_and_ext(fname, ext): + return True + print(f"Error: '{fname}' should have extension '{ext[0]}'") + return False + + +def check_asdf_header(fd): + """ + Makes sure the header line is the expected one, as well + as getting the optional comment line. + + Parameters + ---------- + fd : GenericFile + + Return + ------ + The ASDF header line and the ASDF comment as bytes. + """ + + header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) + if not header_line.startswith(constants.ASDF_MAGIC): + print("Invalid ASDF ID") + sys.exit(1) + + comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', + 5, + "start of content", + include=False, + exception=False) + + return header_line + comment_section + + +def open_and_check_asdf_header(fname): + """ + Open and validate the ASDF file, as well as read in all the YAML + that will be outputted to a YAML file. + + Parameters + ---------- + fname : The character string of the input file name. + + Return + ------ + File descriptor for ASDF file and the ASDF header and ASDF comments as bytes. + """ + fullpath = os.path.abspath(fname) + fd = generic_io.get_file(fullpath, mode="r") + + # Read the ASDF header and optional comments section + header_and_comment = check_asdf_header(fd) + + return fd, header_and_comment # Return GenericFile and ASDF header bytes. + + +def read_and_validate_yaml(fd, fname): + """ + Get the YAML text from an ASDF formatted file. + + Parameters + ---------- + fname : The character string of the input file name. + fd : GenericFile for fname. + + Return + ------ + The YAML portion of an ASDF file as bytes. + """ + YAML_TOKEN = b'%YAML' + token = fd.read(len(YAML_TOKEN)) + if token != YAML_TOKEN: + print(f"Error: No YAML in '{fname}'") + sys.exit(0) + + # Get YAML reader and content + reader = fd.reader_until(constants.YAML_END_MARKER_REGEX, + 7, + 'End of YAML marker', + include=True, + initial_content=token) + yaml_content = reader.read() + + # Create a YAML tree to validate + # The YAML text must be converted to a stream. + tree = yamlutil.load_tree(io.BytesIO(yaml_content)) + if tree is None: + print("Error: 'yamlutil.load_tree' failed to return a tree.") + sys.exist(1) + + schema.validate(tree, None) # Failure raises an exception. + + return yaml_content + +def edit_func(fname, oname): + """ + Creates a YAML file from an ASDF file. The YAML file will contain only the + YAML from the ASDF file. The YAML text will be written to a YAML text file + in the same, so from 'example.asdf' the file 'example.yaml' will be created. + + Parameters + ---------- + fname : The character string of the input ASDF file name. + oname : The character string of the output YAML file name. + """ + if not is_valid_asdf_path(fname): + return False + + # Validate input file is an ASDF file. + fd, asdf_text = open_and_check_asdf_header(fname) + + # Read and validate the YAML of an ASDF file. + yaml_text = read_and_validate_yaml(fd, fname) + + # Open a YAML file for the ASDF YAML. + if not is_yaml_file(oname): + sys.exit(1) + + # Write the YAML for the original ASDF file. + with open(oname, "wb") as ofd: + ofd.write(asdf_text) + ofd.write(yaml_text) + + # Output message to user. + delim = '*' * 70 + print(f"\n{delim}") + print("ASDF formatting and YAML schema validated.") + print(f"The text portion of '{fname}' is written to:") + print(f" '{oname}'") + print(f"The file '{oname}' can be edited using your favorite text editor.") + print("The edited text can then be saved to the ASDF file of your choice") + print("using 'asdftool edit -s -f -o .") + print('-' * 70) + print("Note: This is meant to be a lightweight text editing tool of") + print(" ASDF .If the edited text is larger than the YAML portion") + print(" of the ASDF file to be written to, the edits may not be") + print(" able to saved.") + print(f"{delim}\n") + + return + +def buffer_edited_text(edited_text, orig_text): + """ + There is more text in the original ASDF file than in the edited text, + so we will buffer the edited text with spaces. + + Parameters + ---------- + edited_text - The text from the edited YAML file + orig_text - The text from the original ASDF file + + Return + ------ + The buffered text and the number of spaces added as buffer. + """ + diff = len(orig_text) - len(edited_text) + if diff < 1: + print("Error: shouldn't be here.") + sys.exit(1) + + wdelim = b'\r\n...\r\n' + ldelim = b'\n...\n' + if edited_text[-len(wdelim) :]==wdelim: + delim = wdelim + elif edited_text[-len(ldelim) :]==ldelim: + delim = ldelim + else: + print("Unrecognized YAML delimiter ending the YAML text.") + print(f"It should be {wdelim} or {ldelim}, but the") + print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") + sys.exit(1) + + # May not be correct. If on Windows use '\r\n'. + buffered_text = edited_text[: -len(delim)] + b'\n' + b' '*(diff - 1) + delim + return buffered_text, diff - 1 + + +def add_buffer_to_new_text(edited_text, buffer_size): + """ + Adds buffer to edited text. + + Parameters + ---------- + edited_text - The text from the edited YAML file. + buffer_size - The number of spaces to add as a buffer. + + Return + ------ + Buffered text with the number of spaces requested as buffer. + """ + wdelim = b'\r\n...\r\n' + ldelim = b'\n...\n' + if edited_text[-len(wdelim) :]==wdelim: + delim = wdelim + elif edited_text[-len(ldelim) :]==ldelim: + delim = ldelim + else: + print("Unrecognized YAML delimiter ending the YAML text.") + print(f"It should be {wdelim} or {ldelim}, but the") + print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") + sys.exit(1) + + buf = b' ' * buffer_size + buffered_text = edited_text[: -len(delim)] + b'\n' + buf + delim + + return buffered_text + + +def write_block_index(fd, index): + """ + Write the block index to an ASDF file. + + Parameters + ---------- + fd - The output file to write the block index. + index - A list of locations for each block. + """ + if len(index) < 1: + return + + bindex_hdr = b"#ASDF BLOCK INDEX\n%YAML 1.1\n---\n" + fd.write(bindex_hdr) + for idx in index: + ostr = f'- {idx}\n' + fd.write(ostr.encode('utf-8')) + end = b'...' + fd.write(end) + return + +def get_next_block_header(fd): + """ + From a file, gets the next block header. + + Parameters + ---------- + fd - The ASDF file to get the next block. + + Return + ------ + If a block is found, return the bytes of the block header. + Otherwise return None. + """ + # Block header structure: + # 4 bytes of magic number + # 2 bytes of header length, after the length field (min 48) + # 4 bytes flag + # 4 bytes compression + # 8 bytes allocated size + # 8 bytes used (on disk) size + # 8 bytes data size + # 16 bytes checksum + blk_header = fd.read(6) + if len(blk_header) != 6: + return None + if not blk_header.startswith(constants.BLOCK_MAGIC): + return None + hsz = struct.unpack(">H", blk_header[4:6])[0] + header = fd.read(hsz) + return blk_header + header + +def rewrite_asdf_file(edited_text, orig_text, oname, fname): + """ + Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, + the blocks will be rewritten. A block index will also be rewritten. If a + block index existed in the old file, it will have to be recomputed to + because of the larger YAML size and buffer, which changes the location of + the binary blocks. + + Parameters + ---------- + edited_text : the new YAML text to write out. + orig_text : the original YAML text to overwrite. + oname : the ASDF file to overwrite. + fname : the edit YAML to write to new file. + """ + + tmp_oname = oname + '.tmp' # Save as a temp file, in case anything goes wrong. + buffer_size = 10 * 1000 + buffered_text = add_buffer_to_new_text(edited_text, buffer_size) + + ifd = open(oname, "r+b") # Open old ASDF to get binary blocks + ifd.seek(len(orig_text)) + + ofd = open(tmp_oname, "w+b") # Open temp file to write + ofd.write(buffered_text) # Write edited YAML + + current_location = len(buffered_text) + block_index = [] + alloc_loc = 14 # 4 bytes of block ID, 2 blocks of size, 8 blocks into header + block_chunk = 2048 + while True: + next_block = get_next_block_header(ifd) + if next_block is None: + break + + # Get block size on disk + alloc = struct.unpack(">Q", next_block[alloc_loc:alloc_loc+8])[0] + + # Save block location for block index + block_index.append(current_location) + current_location = current_location + len(next_block) + alloc + + # Copy block + ofd.write(next_block) + while alloc >= block_chunk: + chunk = ifd.read(block_chunk) + ofd.write(chunk) + alloc -= block_chunk + if alloc>0: + chunk = ifd.read(alloc) + ofd.write(chunk) + + + write_block_index(ofd, block_index) + + # Rename temp file. + os.rename(tmp_oname, oname) + + # Output message to user. + delim = '*' * 70 + print(f"\n{delim}") + print(f"The text in '{fname}' was too large to simply overwrite the") + print(f"text in '{oname}'. The file '{oname}' was rewritten to") + print(f"accommodate the larger text size.") + print(f"Also, added a '\\n' and {buffer_size:,} spaces as a buffer for") + print(f"the text in '{oname}' to allow for future edits.") + print(f"{delim}\n") + +def save_func(fname, oname): + """ + Checks to makes sure a corresponding ASDF file exists. This is done by + seeing if a file of the same name with '.asdf' as an extension exists. + Checks to makes sure fname is a valid YAML file. + If the YAML text is smaller than the YAML text in the ASDF file + overwrite the YAML in the ASDF file. + If the YAML text is smaller than the YAML text in the ASDF file + If the file is small, then rewrite file. + If the file is large, ask if rewrite is desired. + + Parameters + ---------- + fname : The input YAML file. + oname : The output ASDF file name. + """ + + if not is_valid_yaml_path(fname): + return False + + if not is_valid_asdf_path(oname): + return False + + # Validate input file is an ASDF formatted YAML. + ifd, iasdf_text = open_and_check_asdf_header(fname) + iyaml_text = read_and_validate_yaml(ifd, fname) + ifd.close() + edited_text = iasdf_text + iyaml_text + + # Get text from ASDF file. + ofd, oasdf_text = open_and_check_asdf_header(oname) + oyaml_text = read_and_validate_yaml(ofd, oname) + ofd.close() + asdf_text = oasdf_text + oyaml_text + + # Compare text sizes and maybe output. + # There are three cases: + msg_delim = '*' * 70 + if len(edited_text) == len(asdf_text): + with open(oname, "r+b") as fd: + fd.write(edited_text) + print(f"\n{msg_delim}") + print(f"The edited text in '{fname}' was written to '{oname}'") + print(f"{msg_delim}\n") + elif len(edited_text) < len(asdf_text): + buffered_text, diff = buffer_edited_text(edited_text, asdf_text) + with open(oname, "r+b") as fd: + fd.write(buffered_text) + print(f"\n{msg_delim}") + print(f"The edited text in '{fname}' was written to '{oname}'") + print(f"Added a '\\n' and {diff} buffer of spaces between the YAML text and binary blocks.") + print(f"{msg_delim}\n") + else: + rewrite_asdf_file(edited_text, asdf_text, oname, fname) + + return + +def edit(args): + """ + Implode a given ASDF file, which may reference external data, back + into a single ASDF file. + + Parameters + ---------- + args : The command line arguments. + """ + if args.edit: + return edit_func(args.fname, args.oname) + elif args.save: + return save_func(args.fname, args.oname) + else: + return print("Invalid arguments") From 24b276a66711c58497b994f40c5146e1edaa3eb2 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 22 Sep 2020 11:43:45 -0400 Subject: [PATCH 24/47] Adding pytest for the edit subcommand for asdftool. --- asdf/commands/tests/test_edit.py | 178 +++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 asdf/commands/tests/test_edit.py diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py new file mode 100644 index 000000000..4d1e01b7b --- /dev/null +++ b/asdf/commands/tests/test_edit.py @@ -0,0 +1,178 @@ +import os +import shutil + +import numpy as np + +import asdf +from asdf import AsdfFile +from asdf.commands import main +from ...tests.helpers import get_file_sizes, assert_tree_match + + +""" +Three tests are defined. + +1. Run the command 'asdftool edit -e' to create a YAML file, simulating + the steps a user would make to start editing an ASDF file. +2. Run the command 'asdftool edit -s' to save YAML edits such that the + edited YAML will have the same or fewer characters as the original + ASDF file, so will be overwritten in place with the space character + used as any buffer, to consume all the memory on disk the YAML takes + up in the ASDF file. +2. Run the command 'asdftool edit -s' to save YAML edits such that the + edited YAML will have the more characters than the original ASDF file. + This triggers a rewrite of the file, since there isn't enough 'room' + on disk to accomadate the edited YAML. The resultant YAML will be + rewritten with a buffer (using the space character as buffer) to + accomodate future edits. If a block index existed in the original ASDF + file, it will need to be recomputed and if one didn't exist, it will be + added to the resultant ASDF file. +""" + + +def create_base_asdf(tmpdir): + """ + In the test temp directory, create a base ASDF file to edit + and test against. + """ + seq = np.arange(100) + + # Store the data in an arbitrarily nested dictionary + tree = { + "foo": 42, + "name": "Monty", + "sequence": seq, + } + + fname = "test_edit_base.asdf" + oname = os.path.join(tmpdir, fname) + if os.path.exists(oname): + os.remove(oname) + af = asdf.AsdfFile(tree) + af.write_to(oname) + + return oname + + +def create_edit_equal(base_yaml): + """ + The YAML from the base ASDF file will have a 'foo' value of 42. Create + an edited YAML file with this value being 41. This will create an edited + YAML file with the same number of characters in the YAML section as was in + the original ASDF file. + """ + with open(base_yaml, "r") as fd: + lines = fd.readlines() + + base, ext = os.path.splitext(base_yaml) + oname = f"{base}_edit_equal.yaml" + if os.path.exists(oname): + os.remove(oname) + with open(oname, "w") as fd: + for l in lines: + if "foo" in l: + print("foo: 41", file=fd) # Change a value + else: + fd.write(l) + + return oname + + +added_line = "bar: 13" + + +def create_edit_larger(base_yaml): + """ + The YAML from the base ASDF file will have a 'foo' value. After this + line, add another line. This will create an edited YAML file that will + have more characters than the YAML portion of the original ASDF file. + """ + with open(base_yaml, "r") as fd: + lines = fd.readlines() + + base, ext = os.path.splitext(base_yaml) + oname = f"{base}_edit_larger.yaml" + if os.path.exists(oname): + os.remove(oname) + with open(oname, "w") as fd: + for l in lines: + fd.write(l) + if "foo" in l: + print(f"{added_line}", file=fd) # Add a line + + return oname + + +def copy_base_asdf_equal(base_asdf): + """ + Create an ASDF file from the base ASDF file to test the editing of the + YAML portion with equal number of YAML characters. + """ + base, ext = os.path.splitext(base_asdf) + oname = f"{base}_equal.asdf" + if os.path.exists(oname): + os.remove(oname) + shutil.copyfile(base_asdf, oname) + + return oname + + +def copy_base_asdf_larger(base_asdf): + """ + Create an ASDF file from the base ASDF file to test the editing of the + YAML portion with a larger number of YAML characters. + """ + base, ext = os.path.splitext(base_asdf) + oname = f"{base}_larger.asdf" + if os.path.exists(oname): + os.remove(oname) + shutil.copyfile(base_asdf, oname) + + return oname + + +def test_edits(tmpdir): + # Test: + # Create base ASDF file for testing + tmpdir = "/Users/kmacdonald/tmp" + asdf_base = create_base_asdf(tmpdir) + + # Create base YAML file from base ASDF file + base, ext = os.path.splitext(asdf_base) + yaml_base = f"{base}.yaml" + # Run: asdftool edit -e -f {asdf_base} -o {yaml_base} + args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{yaml_base}"] + main.main_from_args(args) + + + # Create ASDF file to edit with equal sized YAML + asdf_equal = copy_base_asdf_equal(asdf_base) + + # Create edited YAML file with equal number of characters + yaml_equal = create_edit_equal(yaml_base) + + # Save edits to ASDF files + # Run: asdftool edit -s -f {yaml_equal} -o {asdf_equal} + args = ["edit", "-s", "-f", f"{yaml_equal}", "-o", f"{asdf_equal}"] + print(f"args = {args}") + main.main_from_args(args) + + af_equal = asdf.open(asdf_equal) + assert af_equal.tree["foo"] == 41 + assert os.path.getsize(asdf_equal) == os.path.getsize(asdf_base) + + + + # Create ASDF file to edit with larger sized YAML + asdf_larger = copy_base_asdf_larger(asdf_base) + + # Create edited YAML file with larger number of characters + yaml_larger = create_edit_larger(yaml_base) + + # Run: asdftool edit -s -f {yaml_larger} -o {asdf_larger} + args = ["edit", "-s", "-f", f"{yaml_larger}", "-o", f"{asdf_larger}"] + main.main_from_args(args) + + af_larger = asdf.open(asdf_larger) + assert "bar" in af_larger.tree + assert os.path.getsize(asdf_larger) - os.path.getsize(asdf_base) > 10000 From 94e1be895dae5d01a3d0a5b0e045097de5472f71 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 22 Sep 2020 12:22:54 -0400 Subject: [PATCH 25/47] Adding coverage for a smaller YAML edited file. --- asdf/commands/tests/test_edit.py | 59 +++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 4d1e01b7b..2445a017b 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -78,7 +78,28 @@ def create_edit_equal(base_yaml): return oname -added_line = "bar: 13" +def create_edit_smaller(base_yaml): + """ + The YAML from the base ASDF file will have a 'foo' value of 42. Create + an edited YAML file with this value being 41. This will create an edited + YAML file with the same number of characters in the YAML section as was in + the original ASDF file. + """ + with open(base_yaml, "r") as fd: + lines = fd.readlines() + + base, ext = os.path.splitext(base_yaml) + oname = f"{base}_edit_smaller.yaml" + if os.path.exists(oname): + os.remove(oname) + with open(oname, "w") as fd: + for l in lines: + if "foo" in l: + print("foo: 2", file=fd) # Change a value + else: + fd.write(l) + + return oname def create_edit_larger(base_yaml): @@ -98,7 +119,7 @@ def create_edit_larger(base_yaml): for l in lines: fd.write(l) if "foo" in l: - print(f"{added_line}", file=fd) # Add a line + print("bar: 13", file=fd) # Add a line return oname @@ -117,6 +138,20 @@ def copy_base_asdf_equal(base_asdf): return oname +def copy_base_asdf_smaller(base_asdf): + """ + Create an ASDF file from the base ASDF file to test the editing of the + YAML portion with equal number of YAML characters. + """ + base, ext = os.path.splitext(base_asdf) + oname = f"{base}_smaller.asdf" + if os.path.exists(oname): + os.remove(oname) + shutil.copyfile(base_asdf, oname) + + return oname + + def copy_base_asdf_larger(base_asdf): """ Create an ASDF file from the base ASDF file to test the editing of the @@ -144,7 +179,22 @@ def test_edits(tmpdir): args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{yaml_base}"] main.main_from_args(args) + # Test smaller + # Create ASDF file to edit with larger sized YAML + asdf_smaller = copy_base_asdf_smaller(asdf_base) + + # Create edited YAML file with larger number of characters + yaml_smaller = create_edit_smaller(yaml_base) + + # Run: asdftool edit -s -f {yaml_larger} -o {asdf_larger} + args = ["edit", "-s", "-f", f"{yaml_smaller}", "-o", f"{asdf_smaller}"] + main.main_from_args(args) + + af_smaller = asdf.open(asdf_smaller) + assert af_smaller.tree["foo"] == 2 + assert os.path.getsize(asdf_smaller) == os.path.getsize(asdf_base) + # Test equal # Create ASDF file to edit with equal sized YAML asdf_equal = copy_base_asdf_equal(asdf_base) @@ -159,10 +209,9 @@ def test_edits(tmpdir): af_equal = asdf.open(asdf_equal) assert af_equal.tree["foo"] == 41 - assert os.path.getsize(asdf_equal) == os.path.getsize(asdf_base) - - + assert os.path.getsize(asdf_equal) == os.path.getsize(asdf_base) + # Test larger # Create ASDF file to edit with larger sized YAML asdf_larger = copy_base_asdf_larger(asdf_base) From d0f1aa83df800c2e62c589b29ebcb8c64c5841a8 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 22 Sep 2020 12:45:01 -0400 Subject: [PATCH 26/47] Adding parametrization the edit test to test all supported versions. --- asdf/commands/tests/test_edit.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 2445a017b..dead1b60e 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -2,6 +2,7 @@ import shutil import numpy as np +import pytest import asdf from asdf import AsdfFile @@ -9,6 +10,7 @@ from ...tests.helpers import get_file_sizes, assert_tree_match + """ Three tests are defined. @@ -30,7 +32,8 @@ """ -def create_base_asdf(tmpdir): + +def create_base_asdf(tmpdir, version): """ In the test temp directory, create a base ASDF file to edit and test against. @@ -48,7 +51,7 @@ def create_base_asdf(tmpdir): oname = os.path.join(tmpdir, fname) if os.path.exists(oname): os.remove(oname) - af = asdf.AsdfFile(tree) + af = asdf.AsdfFile(tree,version=version) af.write_to(oname) return oname @@ -166,11 +169,12 @@ def copy_base_asdf_larger(base_asdf): return oname -def test_edits(tmpdir): +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edits(tmpdir,version): # Test: # Create base ASDF file for testing tmpdir = "/Users/kmacdonald/tmp" - asdf_base = create_base_asdf(tmpdir) + asdf_base = create_base_asdf(tmpdir, version) # Create base YAML file from base ASDF file base, ext = os.path.splitext(asdf_base) From 0fbf98bf5c1b34e8e78df646d8880d72fdfd1952 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 22 Sep 2020 15:42:23 -0400 Subject: [PATCH 27/47] Removing hardcoded path used for local testing. --- asdf/commands/tests/test_edit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index dead1b60e..65a87d22b 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -173,7 +173,6 @@ def copy_base_asdf_larger(base_asdf): def test_edits(tmpdir,version): # Test: # Create base ASDF file for testing - tmpdir = "/Users/kmacdonald/tmp" asdf_base = create_base_asdf(tmpdir, version) # Create base YAML file from base ASDF file From 5ecb3b1c617f568ff4173d79f7f7155f5601d330 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 22 Sep 2020 17:17:46 -0400 Subject: [PATCH 28/47] Refactored the automated tester for the edit subcommand of asdftool. --- asdf/commands/tests/test_edit.py | 231 +++++++------------------------ 1 file changed, 48 insertions(+), 183 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 65a87d22b..8d85058d6 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -1,4 +1,5 @@ import os +import re import shutil import numpy as np @@ -10,30 +11,7 @@ from ...tests.helpers import get_file_sizes, assert_tree_match - -""" -Three tests are defined. - -1. Run the command 'asdftool edit -e' to create a YAML file, simulating - the steps a user would make to start editing an ASDF file. -2. Run the command 'asdftool edit -s' to save YAML edits such that the - edited YAML will have the same or fewer characters as the original - ASDF file, so will be overwritten in place with the space character - used as any buffer, to consume all the memory on disk the YAML takes - up in the ASDF file. -2. Run the command 'asdftool edit -s' to save YAML edits such that the - edited YAML will have the more characters than the original ASDF file. - This triggers a rewrite of the file, since there isn't enough 'room' - on disk to accomadate the edited YAML. The resultant YAML will be - rewritten with a buffer (using the space character as buffer) to - accomodate future edits. If a block index existed in the original ASDF - file, it will need to be recomputed and if one didn't exist, it will be - added to the resultant ASDF file. -""" - - - -def create_base_asdf(tmpdir, version): +def _create_base_asdf(version, oname): """ In the test temp directory, create a base ASDF file to edit and test against. @@ -47,184 +25,71 @@ def create_base_asdf(tmpdir, version): "sequence": seq, } - fname = "test_edit_base.asdf" - oname = os.path.join(tmpdir, fname) - if os.path.exists(oname): - os.remove(oname) - af = asdf.AsdfFile(tree,version=version) + af = asdf.AsdfFile(tree, version=version) af.write_to(oname) - return oname - - -def create_edit_equal(base_yaml): - """ - The YAML from the base ASDF file will have a 'foo' value of 42. Create - an edited YAML file with this value being 41. This will create an edited - YAML file with the same number of characters in the YAML section as was in - the original ASDF file. - """ - with open(base_yaml, "r") as fd: - lines = fd.readlines() - - base, ext = os.path.splitext(base_yaml) - oname = f"{base}_edit_equal.yaml" - if os.path.exists(oname): - os.remove(oname) - with open(oname, "w") as fd: - for l in lines: - if "foo" in l: - print("foo: 41", file=fd) # Change a value - else: - fd.write(l) - - return oname - - -def create_edit_smaller(base_yaml): - """ - The YAML from the base ASDF file will have a 'foo' value of 42. Create - an edited YAML file with this value being 41. This will create an edited - YAML file with the same number of characters in the YAML section as was in - the original ASDF file. - """ - with open(base_yaml, "r") as fd: - lines = fd.readlines() - - base, ext = os.path.splitext(base_yaml) - oname = f"{base}_edit_smaller.yaml" - if os.path.exists(oname): - os.remove(oname) - with open(oname, "w") as fd: - for l in lines: - if "foo" in l: - print("foo: 2", file=fd) # Change a value - else: - fd.write(l) - - return oname - - -def create_edit_larger(base_yaml): - """ - The YAML from the base ASDF file will have a 'foo' value. After this - line, add another line. This will create an edited YAML file that will - have more characters than the YAML portion of the original ASDF file. - """ - with open(base_yaml, "r") as fd: - lines = fd.readlines() - - base, ext = os.path.splitext(base_yaml) - oname = f"{base}_edit_larger.yaml" - if os.path.exists(oname): - os.remove(oname) - with open(oname, "w") as fd: - for l in lines: - fd.write(l) - if "foo" in l: - print("bar: 13", file=fd) # Add a line - - return oname +def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): + with open(base_yaml) as fd: + content = fd.read() + new_content = re.sub(pattern, replacement, content) + with open(edited_yaml, "w") as fd: + fd.write(new_content) -def copy_base_asdf_equal(base_asdf): - """ - Create an ASDF file from the base ASDF file to test the editing of the - YAML portion with equal number of YAML characters. - """ - base, ext = os.path.splitext(base_asdf) - oname = f"{base}_equal.asdf" - if os.path.exists(oname): - os.remove(oname) - shutil.copyfile(base_asdf, oname) - return oname +def _initialize_test(tmpdir, version): + asdf_base = os.path.join(tmpdir, "base.asdf") + yaml_base = os.path.join(tmpdir, "base.yaml") + asdf_edit = os.path.join(tmpdir, "edit.asdf") + yaml_edit = os.path.join(tmpdir, "edit.yaml") + _create_base_asdf(version,asdf_base) + shutil.copyfile(asdf_base, asdf_edit) -def copy_base_asdf_smaller(base_asdf): - """ - Create an ASDF file from the base ASDF file to test the editing of the - YAML portion with equal number of YAML characters. - """ - base, ext = os.path.splitext(base_asdf) - oname = f"{base}_smaller.asdf" - if os.path.exists(oname): - os.remove(oname) - shutil.copyfile(base_asdf, oname) - - return oname - - -def copy_base_asdf_larger(base_asdf): - """ - Create an ASDF file from the base ASDF file to test the editing of the - YAML portion with a larger number of YAML characters. - """ - base, ext = os.path.splitext(base_asdf) - oname = f"{base}_larger.asdf" - if os.path.exists(oname): - os.remove(oname) - shutil.copyfile(base_asdf, oname) + args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{yaml_base}"] + main.main_from_args(args) - return oname + return asdf_base, yaml_base, asdf_edit, yaml_edit @pytest.mark.parametrize("version", asdf.versioning.supported_versions) -def test_edits(tmpdir,version): - # Test: - # Create base ASDF file for testing - asdf_base = create_base_asdf(tmpdir, version) - - # Create base YAML file from base ASDF file - base, ext = os.path.splitext(asdf_base) - yaml_base = f"{base}.yaml" - # Run: asdftool edit -e -f {asdf_base} -o {yaml_base} - args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{yaml_base}"] - main.main_from_args(args) - - # Test smaller - # Create ASDF file to edit with larger sized YAML - asdf_smaller = copy_base_asdf_smaller(asdf_base) +def test_edit_smaller(tmpdir, version): + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) - # Create edited YAML file with larger number of characters - yaml_smaller = create_edit_smaller(yaml_base) + _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 2") - # Run: asdftool edit -s -f {yaml_larger} -o {asdf_larger} - args = ["edit", "-s", "-f", f"{yaml_smaller}", "-o", f"{asdf_smaller}"] - main.main_from_args(args) + args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] + ret = main.main_from_args(args) + assert 0==ret - af_smaller = asdf.open(asdf_smaller) - assert af_smaller.tree["foo"] == 2 - assert os.path.getsize(asdf_smaller) == os.path.getsize(asdf_base) + with asdf.open(asdf_edit) as af: + assert af.tree["foo"] == 2 + assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) - # Test equal - # Create ASDF file to edit with equal sized YAML - asdf_equal = copy_base_asdf_equal(asdf_base) +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_equal(tmpdir, version): + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) - # Create edited YAML file with equal number of characters - yaml_equal = create_edit_equal(yaml_base) + _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 41") - # Save edits to ASDF files - # Run: asdftool edit -s -f {yaml_equal} -o {asdf_equal} - args = ["edit", "-s", "-f", f"{yaml_equal}", "-o", f"{asdf_equal}"] - print(f"args = {args}") - main.main_from_args(args) + args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] + ret = main.main_from_args(args) + assert 0==ret - af_equal = asdf.open(asdf_equal) - assert af_equal.tree["foo"] == 41 - assert os.path.getsize(asdf_equal) == os.path.getsize(asdf_base) + with asdf.open(asdf_edit) as af: + assert af.tree["foo"] == 41 + assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) - # Test larger - # Create ASDF file to edit with larger sized YAML - asdf_larger = copy_base_asdf_larger(asdf_base) +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_larger(tmpdir, version): + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) - # Create edited YAML file with larger number of characters - yaml_larger = create_edit_larger(yaml_base) + _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 41\nbar: 13") - # Run: asdftool edit -s -f {yaml_larger} -o {asdf_larger} - args = ["edit", "-s", "-f", f"{yaml_larger}", "-o", f"{asdf_larger}"] - main.main_from_args(args) + args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] + ret = main.main_from_args(args) + assert 0==ret - af_larger = asdf.open(asdf_larger) - assert "bar" in af_larger.tree - assert os.path.getsize(asdf_larger) - os.path.getsize(asdf_base) > 10000 + with asdf.open(asdf_edit) as af: + assert "bar" in af.tree + assert os.path.getsize(asdf_edit) - os.path.getsize(asdf_base) > 10000 From 6f0d543cc915445ec1c5536e18d9d1d10f51fec2 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Tue, 22 Sep 2020 17:24:57 -0400 Subject: [PATCH 29/47] Reformatting the tests and changing some values in the tree. --- asdf/commands/tests/test_edit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 8d85058d6..902f60a30 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -61,10 +61,10 @@ def test_edit_smaller(tmpdir, version): args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] ret = main.main_from_args(args) assert 0==ret + assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) with asdf.open(asdf_edit) as af: assert af.tree["foo"] == 2 - assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_equal(tmpdir, version): @@ -75,21 +75,21 @@ def test_edit_equal(tmpdir, version): args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] ret = main.main_from_args(args) assert 0==ret + assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) with asdf.open(asdf_edit) as af: assert af.tree["foo"] == 41 - assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_larger(tmpdir, version): asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) - _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 41\nbar: 13") + _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 42\nbar: 13") args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] ret = main.main_from_args(args) assert 0==ret + assert os.path.getsize(asdf_edit) - os.path.getsize(asdf_base) > 10000 with asdf.open(asdf_edit) as af: assert "bar" in af.tree - assert os.path.getsize(asdf_edit) - os.path.getsize(asdf_base) > 10000 From 9d2a66323c11b3f23542d14c9ffefb9559f9cc72 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 23 Sep 2020 08:23:28 -0400 Subject: [PATCH 30/47] Adding formatting changes and checks using feed back from code review. --- asdf/commands/tests/test_edit.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 902f60a30..ecc6ea2c9 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -6,9 +6,7 @@ import pytest import asdf -from asdf import AsdfFile from asdf.commands import main -from ...tests.helpers import get_file_sizes, assert_tree_match def _create_base_asdf(version, oname): @@ -43,7 +41,7 @@ def _initialize_test(tmpdir, version): asdf_edit = os.path.join(tmpdir, "edit.asdf") yaml_edit = os.path.join(tmpdir, "edit.yaml") - _create_base_asdf(version,asdf_base) + _create_base_asdf(version, asdf_base) shutil.copyfile(asdf_base, asdf_edit) args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{yaml_base}"] @@ -60,12 +58,12 @@ def test_edit_smaller(tmpdir, version): args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] ret = main.main_from_args(args) - assert 0==ret assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) with asdf.open(asdf_edit) as af: assert af.tree["foo"] == 2 + @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_equal(tmpdir, version): asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) @@ -74,12 +72,12 @@ def test_edit_equal(tmpdir, version): args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] ret = main.main_from_args(args) - assert 0==ret assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) with asdf.open(asdf_edit) as af: assert af.tree["foo"] == 41 + @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_larger(tmpdir, version): asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) @@ -88,7 +86,6 @@ def test_edit_larger(tmpdir, version): args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] ret = main.main_from_args(args) - assert 0==ret assert os.path.getsize(asdf_edit) - os.path.getsize(asdf_base) > 10000 with asdf.open(asdf_edit) as af: From 6639916bef267446f1e45217859ff14d9c47fcd1 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 23 Sep 2020 08:40:48 -0400 Subject: [PATCH 31/47] Formatting changes based on black and tox. --- asdf/commands/__init__.py | 2 +- asdf/commands/edit.py | 164 +++++++++++++++++++++----------------- 2 files changed, 91 insertions(+), 75 deletions(-) diff --git a/asdf/commands/__init__.py b/asdf/commands/__init__.py index 3a45d8412..3a0afb322 100644 --- a/asdf/commands/__init__.py +++ b/asdf/commands/__init__.py @@ -10,7 +10,7 @@ from .edit import edit -__all__ = [ 'implode', 'explode', 'to_yaml', 'defragment', 'diff', 'list_tags', +__all__ = ['implode', 'explode', 'to_yaml', 'defragment', 'diff', 'list_tags', 'find_extensions', 'info', 'edit'] diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 17d5b9ddd..a7fcccde6 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -16,7 +16,8 @@ from .main import Command -__all__ = ['edit'] +__all__ = ["edit"] + class Edit(Command): @classmethod @@ -24,48 +25,58 @@ def setup_arguments(cls, subparsers): """ Set up a command line argument parser for the edit subcommand. """ - desc_string = "Allows for easy editing of the YAML in an ASDF file. " \ - "For edit mode, the YAML portion of an ASDF file is" \ - "separated from the ASDF into a text file for easy" \ - "editing. For save mode, the edited text file is written" \ - "to its ASDF file." + desc_string = ( + "Allows for easy editing of the YAML in an ASDF file. " + "For edit mode, the YAML portion of an ASDF file is" + "separated from the ASDF into a text file for easy" + "editing. For save mode, the edited text file is written" + "to its ASDF file." + ) # Set up the parser parser = subparsers.add_parser( str("edit"), help="Edit YAML portion of an ASDF file.", - description=desc_string) + description=desc_string, + ) # Need an input file parser.add_argument( - '--infile', '-f', + "--infile", + "-f", type=str, required=True, - dest='fname', - help="Input file (ASDF for -e option, YAML for -s option") + dest="fname", + help="Input file (ASDF for -e option, YAML for -s option", + ) # Need an output file parser.add_argument( - '--outfile', '-o', + "--outfile", + "-o", type=str, required=True, - dest='oname', - help="Output file (YAML for -e option, ASDF for -s option") + dest="oname", + help="Output file (YAML for -e option, ASDF for -s option", + ) # The edit is either being performed or saved group = parser.add_mutually_exclusive_group(required=True) group.add_argument( - '-s', - action='store_true', - dest='save', - help="Saves a YAML text file to its ASDF file. Requires an YAML input file and ASDF output file.") + "-s", + action="store_true", + dest="save", + help="Saves a YAML text file to an ASDF file. Requires a " + "YAML input file and ASDF output file.", + ) group.add_argument( - '-e', - action='store_true', - dest='edit', - help="Create a YAML text file for a ASDF file. Requires a ASDF input file.") + "-e", + action="store_true", + dest="edit", + help="Create a YAML text file for a ASDF file. Requires a ASDF input file.", + ) parser.set_defaults(func=cls.run) @@ -89,7 +100,7 @@ def is_yaml_file(fname): """ base, ext = os.path.splitext(fname) - if '.yaml' != ext: + if ".yaml" != ext: return False return True @@ -127,7 +138,7 @@ def is_valid_asdf_path(fname): ---------- fname : The character string of the input file name. """ - ext = ['.asdf'] + ext = [".asdf"] if is_valid_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") @@ -142,7 +153,7 @@ def is_valid_yaml_path(fname): ---------- fname : The character string of the input file name. """ - ext = ['.yaml'] + ext = [".yaml"] if is_valid_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") @@ -163,16 +174,18 @@ def check_asdf_header(fd): The ASDF header line and the ASDF comment as bytes. """ - header_line = fd.read_until(b'\r?\n', 2, "newline", include=True) + header_line = fd.read_until(b"\r?\n", 2, "newline", include=True) if not header_line.startswith(constants.ASDF_MAGIC): print("Invalid ASDF ID") sys.exit(1) - comment_section = fd.read_until( b'(%YAML)|(' + constants.BLOCK_MAGIC + b')', - 5, - "start of content", - include=False, - exception=False) + comment_section = fd.read_until( + b"(%YAML)|(" + constants.BLOCK_MAGIC + b")", + 5, + "start of content", + include=False, + exception=False, + ) return header_line + comment_section @@ -196,7 +209,7 @@ def open_and_check_asdf_header(fname): # Read the ASDF header and optional comments section header_and_comment = check_asdf_header(fd) - return fd, header_and_comment # Return GenericFile and ASDF header bytes. + return fd, header_and_comment # Return GenericFile and ASDF header bytes. def read_and_validate_yaml(fd, fname): @@ -212,18 +225,20 @@ def read_and_validate_yaml(fd, fname): ------ The YAML portion of an ASDF file as bytes. """ - YAML_TOKEN = b'%YAML' + YAML_TOKEN = b"%YAML" token = fd.read(len(YAML_TOKEN)) if token != YAML_TOKEN: print(f"Error: No YAML in '{fname}'") sys.exit(0) # Get YAML reader and content - reader = fd.reader_until(constants.YAML_END_MARKER_REGEX, - 7, - 'End of YAML marker', - include=True, - initial_content=token) + reader = fd.reader_until( + constants.YAML_END_MARKER_REGEX, + 7, + "End of YAML marker", + include=True, + initial_content=token, + ) yaml_content = reader.read() # Create a YAML tree to validate @@ -233,10 +248,11 @@ def read_and_validate_yaml(fd, fname): print("Error: 'yamlutil.load_tree' failed to return a tree.") sys.exist(1) - schema.validate(tree, None) # Failure raises an exception. + schema.validate(tree, None) # Failure raises an exception. return yaml_content + def edit_func(fname, oname): """ Creates a YAML file from an ASDF file. The YAML file will contain only the @@ -259,6 +275,7 @@ def edit_func(fname, oname): # Open a YAML file for the ASDF YAML. if not is_yaml_file(oname): + print("A YAML file is expected, with '.yaml' extension.") sys.exit(1) # Write the YAML for the original ASDF file. @@ -267,7 +284,7 @@ def edit_func(fname, oname): ofd.write(yaml_text) # Output message to user. - delim = '*' * 70 + delim = "*" * 70 print(f"\n{delim}") print("ASDF formatting and YAML schema validated.") print(f"The text portion of '{fname}' is written to:") @@ -275,15 +292,12 @@ def edit_func(fname, oname): print(f"The file '{oname}' can be edited using your favorite text editor.") print("The edited text can then be saved to the ASDF file of your choice") print("using 'asdftool edit -s -f -o .") - print('-' * 70) - print("Note: This is meant to be a lightweight text editing tool of") - print(" ASDF .If the edited text is larger than the YAML portion") - print(" of the ASDF file to be written to, the edits may not be") - print(" able to saved.") print(f"{delim}\n") return + +# TODO conosidate the next two functions and change 'buffer' to 'pad'. def buffer_edited_text(edited_text, orig_text): """ There is more text in the original ASDF file than in the edited text, @@ -299,15 +313,12 @@ def buffer_edited_text(edited_text, orig_text): The buffered text and the number of spaces added as buffer. """ diff = len(orig_text) - len(edited_text) - if diff < 1: - print("Error: shouldn't be here.") - sys.exit(1) - wdelim = b'\r\n...\r\n' - ldelim = b'\n...\n' - if edited_text[-len(wdelim) :]==wdelim: + wdelim = b"\r\n...\r\n" + ldelim = b"\n...\n" + if edited_text[-len(wdelim) :] == wdelim: delim = wdelim - elif edited_text[-len(ldelim) :]==ldelim: + elif edited_text[-len(ldelim) :] == ldelim: delim = ldelim else: print("Unrecognized YAML delimiter ending the YAML text.") @@ -315,8 +326,7 @@ def buffer_edited_text(edited_text, orig_text): print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") sys.exit(1) - # May not be correct. If on Windows use '\r\n'. - buffered_text = edited_text[: -len(delim)] + b'\n' + b' '*(diff - 1) + delim + buffered_text = edited_text[: -len(delim)] + b"\n" + b"\0" * (diff - 1) + delim return buffered_text, diff - 1 @@ -333,11 +343,11 @@ def add_buffer_to_new_text(edited_text, buffer_size): ------ Buffered text with the number of spaces requested as buffer. """ - wdelim = b'\r\n...\r\n' - ldelim = b'\n...\n' - if edited_text[-len(wdelim) :]==wdelim: + wdelim = b"\r\n...\r\n" + ldelim = b"\n...\n" + if edited_text[-len(wdelim) :] == wdelim: delim = wdelim - elif edited_text[-len(ldelim) :]==ldelim: + elif edited_text[-len(ldelim) :] == ldelim: delim = ldelim else: print("Unrecognized YAML delimiter ending the YAML text.") @@ -345,8 +355,8 @@ def add_buffer_to_new_text(edited_text, buffer_size): print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") sys.exit(1) - buf = b' ' * buffer_size - buffered_text = edited_text[: -len(delim)] + b'\n' + buf + delim + buf = b" " * buffer_size + buffered_text = edited_text[: -len(delim)] + b"\n" + buf + delim return buffered_text @@ -363,15 +373,17 @@ def write_block_index(fd, index): if len(index) < 1: return + # TODO - this needs to be changed to use constants.py and pyyaml bindex_hdr = b"#ASDF BLOCK INDEX\n%YAML 1.1\n---\n" fd.write(bindex_hdr) for idx in index: - ostr = f'- {idx}\n' - fd.write(ostr.encode('utf-8')) - end = b'...' + ostr = f"- {idx}\n" + fd.write(ostr.encode("utf-8")) + end = b"..." fd.write(end) return + def get_next_block_header(fd): """ From a file, gets the next block header. @@ -403,6 +415,7 @@ def get_next_block_header(fd): header = fd.read(hsz) return blk_header + header + def rewrite_asdf_file(edited_text, orig_text, oname, fname): """ Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, @@ -419,19 +432,19 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): fname : the edit YAML to write to new file. """ - tmp_oname = oname + '.tmp' # Save as a temp file, in case anything goes wrong. + tmp_oname = oname + ".tmp" # Save as a temp file, in case anything goes wrong. buffer_size = 10 * 1000 buffered_text = add_buffer_to_new_text(edited_text, buffer_size) - ifd = open(oname, "r+b") # Open old ASDF to get binary blocks + ifd = open(oname, "r+b") # Open old ASDF to get binary blocks ifd.seek(len(orig_text)) - ofd = open(tmp_oname, "w+b") # Open temp file to write - ofd.write(buffered_text) # Write edited YAML + ofd = open(tmp_oname, "w+b") # Open temp file to write + ofd.write(buffered_text) # Write edited YAML current_location = len(buffered_text) block_index = [] - alloc_loc = 14 # 4 bytes of block ID, 2 blocks of size, 8 blocks into header + alloc_loc = 14 # 4 bytes of block ID, 2 blocks of size, 8 blocks into header block_chunk = 2048 while True: next_block = get_next_block_header(ifd) @@ -439,7 +452,7 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): break # Get block size on disk - alloc = struct.unpack(">Q", next_block[alloc_loc:alloc_loc+8])[0] + alloc = struct.unpack(">Q", next_block[alloc_loc : alloc_loc + 8])[0] # Save block location for block index block_index.append(current_location) @@ -451,26 +464,26 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): chunk = ifd.read(block_chunk) ofd.write(chunk) alloc -= block_chunk - if alloc>0: + if alloc > 0: chunk = ifd.read(alloc) ofd.write(chunk) - write_block_index(ofd, block_index) # Rename temp file. os.rename(tmp_oname, oname) # Output message to user. - delim = '*' * 70 + delim = "*" * 70 print(f"\n{delim}") print(f"The text in '{fname}' was too large to simply overwrite the") print(f"text in '{oname}'. The file '{oname}' was rewritten to") - print(f"accommodate the larger text size.") + print("accommodate the larger text size.") print(f"Also, added a '\\n' and {buffer_size:,} spaces as a buffer for") print(f"the text in '{oname}' to allow for future edits.") print(f"{delim}\n") + def save_func(fname, oname): """ Checks to makes sure a corresponding ASDF file exists. This is done by @@ -508,7 +521,7 @@ def save_func(fname, oname): # Compare text sizes and maybe output. # There are three cases: - msg_delim = '*' * 70 + msg_delim = "*" * 70 if len(edited_text) == len(asdf_text): with open(oname, "r+b") as fd: fd.write(edited_text) @@ -521,13 +534,16 @@ def save_func(fname, oname): fd.write(buffered_text) print(f"\n{msg_delim}") print(f"The edited text in '{fname}' was written to '{oname}'") - print(f"Added a '\\n' and {diff} buffer of spaces between the YAML text and binary blocks.") + print( + f"Added a '\\n' and {diff} buffer of spaces between the YAML text and binary blocks." + ) print(f"{msg_delim}\n") else: rewrite_asdf_file(edited_text, asdf_text, oname, fname) return + def edit(args): """ Implode a given ASDF file, which may reference external data, back From 7d77ce5bbb53d64a9a40178f0b40d7dcfe9eab7b Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 23 Sep 2020 09:33:51 -0400 Subject: [PATCH 32/47] Adding changes based on pull request feedback. --- asdf/commands/edit.py | 133 +++++++++++++++++++++++++----------------- 1 file changed, 79 insertions(+), 54 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index a7fcccde6..4be9d08f6 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -68,7 +68,7 @@ def setup_arguments(cls, subparsers): action="store_true", dest="save", help="Saves a YAML text file to an ASDF file. Requires a " - "YAML input file and ASDF output file.", + "YAML input file and ASDF output file.", ) group.add_argument( @@ -96,7 +96,12 @@ def is_yaml_file(fname): Parameters ---------- - fname : The character string of the input file name. + fname : str + Input file name. + + Return + ------ + bool """ base, ext = os.path.splitext(fname) @@ -111,8 +116,13 @@ def is_valid_path_and_ext(fname, wanted_ext=None): Parameters ---------- - fname : The character string of the input file name. + fname : str + Input file name. wanted_ext : List of extensions to check. + + Return + ------ + bool """ if not os.path.exists(fname): print(f"Error: No file '{fname}' exists.") @@ -136,7 +146,12 @@ def is_valid_asdf_path(fname): Parameters ---------- - fname : The character string of the input file name. + fname : str + ASDF file name + + Return + ------ + bool """ ext = [".asdf"] if is_valid_path_and_ext(fname, ext): @@ -151,7 +166,12 @@ def is_valid_yaml_path(fname): Parameters ---------- - fname : The character string of the input file name. + fname : str + ASDF file name + + Return + ------ + bool """ ext = [".yaml"] if is_valid_path_and_ext(fname, ext): @@ -197,7 +217,8 @@ def open_and_check_asdf_header(fname): Parameters ---------- - fname : The character string of the input file name. + fname : str + Input file name Return ------ @@ -218,7 +239,8 @@ def read_and_validate_yaml(fd, fname): Parameters ---------- - fname : The character string of the input file name. + fname : str + Input file name fd : GenericFile for fname. Return @@ -261,8 +283,10 @@ def edit_func(fname, oname): Parameters ---------- - fname : The character string of the input ASDF file name. - oname : The character string of the output YAML file name. + fname : str + Input ASDF file name + oname : str + Output YAML file name. """ if not is_valid_asdf_path(fname): return False @@ -297,11 +321,31 @@ def edit_func(fname, oname): return -# TODO conosidate the next two functions and change 'buffer' to 'pad'. -def buffer_edited_text(edited_text, orig_text): +def get_yaml_text_and_delimiter(yaml_text): + """ + Splits the YAML text into the text and the end delimiter in preparation + for padding. + """ + + wdelim = b"\r\n...\r\n" + ldelim = b"\n...\n" + if yaml_text[-len(wdelim) :] == wdelim: + delim = wdelim + elif yaml_text[-len(ldelim) :] == ldelim: + delim = ldelim + else: + print("Unrecognized YAML delimiter ending the YAML text.") + print(f"It should be {wdelim} or {ldelim}, but the") + print(f"last {len(wdelim)} bytes are {yaml_text[-len(wdelim):]}.") + sys.exit(1) + + return yaml_text[: -len(delim)], delim + + +def pad_edited_text(edited_text, orig_text): """ There is more text in the original ASDF file than in the edited text, - so we will buffer the edited text with spaces. + so we will pad the edited text with spaces. Parameters ---------- @@ -310,55 +354,36 @@ def buffer_edited_text(edited_text, orig_text): Return ------ - The buffered text and the number of spaces added as buffer. + The padded text and the number of spaces added as pad. """ diff = len(orig_text) - len(edited_text) - wdelim = b"\r\n...\r\n" - ldelim = b"\n...\n" - if edited_text[-len(wdelim) :] == wdelim: - delim = wdelim - elif edited_text[-len(ldelim) :] == ldelim: - delim = ldelim - else: - print("Unrecognized YAML delimiter ending the YAML text.") - print(f"It should be {wdelim} or {ldelim}, but the") - print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") - sys.exit(1) + edited_text, delim = get_yaml_text_and_delimiter(edited_text) - buffered_text = edited_text[: -len(delim)] + b"\n" + b"\0" * (diff - 1) + delim - return buffered_text, diff - 1 + padded_text = edited_text + b"\n" + b" " * (diff - 1) + delim + return padded_text, diff - 1 -def add_buffer_to_new_text(edited_text, buffer_size): +def add_pad_to_new_text(edited_text, pad_size): """ - Adds buffer to edited text. + Adds pad to edited text. Parameters ---------- edited_text - The text from the edited YAML file. - buffer_size - The number of spaces to add as a buffer. + pad_size - The number of spaces to add as a pad. Return ------ - Buffered text with the number of spaces requested as buffer. + Pad text with the number of spaces requested as pad. """ - wdelim = b"\r\n...\r\n" - ldelim = b"\n...\n" - if edited_text[-len(wdelim) :] == wdelim: - delim = wdelim - elif edited_text[-len(ldelim) :] == ldelim: - delim = ldelim - else: - print("Unrecognized YAML delimiter ending the YAML text.") - print(f"It should be {wdelim} or {ldelim}, but the") - print(f"last {len(wdelim)} bytes are {edited_text[-len(wdelim):]}.") - sys.exit(1) - buf = b" " * buffer_size - buffered_text = edited_text[: -len(delim)] + b"\n" + buf + delim + edited_text, delim = get_yaml_text_and_delimiter(edited_text) + + pad = b" " * pad_size + padded_text = edited_text + b"\n" + pad + delim - return buffered_text + return padded_text def write_block_index(fd, index): @@ -418,10 +443,10 @@ def get_next_block_header(fd): def rewrite_asdf_file(edited_text, orig_text, oname, fname): """ - Rewrite an ASDF file for too large edited YAML. The edited YAML, a buffer, + Rewrite an ASDF file for too large edited YAML. The edited YAML, a pad, the blocks will be rewritten. A block index will also be rewritten. If a block index existed in the old file, it will have to be recomputed to - because of the larger YAML size and buffer, which changes the location of + because of the larger YAML size and pad, which changes the location of the binary blocks. Parameters @@ -433,16 +458,16 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): """ tmp_oname = oname + ".tmp" # Save as a temp file, in case anything goes wrong. - buffer_size = 10 * 1000 - buffered_text = add_buffer_to_new_text(edited_text, buffer_size) + pad_size = 10 * 1000 + padded_text = add_pad_to_new_text(edited_text, pad_size) ifd = open(oname, "r+b") # Open old ASDF to get binary blocks ifd.seek(len(orig_text)) ofd = open(tmp_oname, "w+b") # Open temp file to write - ofd.write(buffered_text) # Write edited YAML + ofd.write(padded_text) # Write edited YAML - current_location = len(buffered_text) + current_location = len(padded_text) block_index = [] alloc_loc = 14 # 4 bytes of block ID, 2 blocks of size, 8 blocks into header block_chunk = 2048 @@ -479,7 +504,7 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): print(f"The text in '{fname}' was too large to simply overwrite the") print(f"text in '{oname}'. The file '{oname}' was rewritten to") print("accommodate the larger text size.") - print(f"Also, added a '\\n' and {buffer_size:,} spaces as a buffer for") + print(f"Also, added a '\\n' and {pad_size:,} '\\0' as a pad for") print(f"the text in '{oname}' to allow for future edits.") print(f"{delim}\n") @@ -529,13 +554,13 @@ def save_func(fname, oname): print(f"The edited text in '{fname}' was written to '{oname}'") print(f"{msg_delim}\n") elif len(edited_text) < len(asdf_text): - buffered_text, diff = buffer_edited_text(edited_text, asdf_text) + padded_text, diff = pad_edited_text(edited_text, asdf_text) with open(oname, "r+b") as fd: - fd.write(buffered_text) + fd.write(padded_text) print(f"\n{msg_delim}") print(f"The edited text in '{fname}' was written to '{oname}'") print( - f"Added a '\\n' and {diff} buffer of spaces between the YAML text and binary blocks." + f"Added a '\\n' and {diff} pad of '\\0' between the YAML text and binary blocks." ) print(f"{msg_delim}\n") else: From d61258f8928b2b32c232881e2ef8e638491ee962 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 23 Sep 2020 11:37:08 -0400 Subject: [PATCH 33/47] Adding file closures that may have been causing problems during testing, as well as modified file name usage, so each test had its own set of unique file names that may have been causing problems during testings. --- asdf/commands/edit.py | 3 +++ asdf/commands/tests/test_edit.py | 22 +++++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 4be9d08f6..6d4e4349a 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -296,6 +296,7 @@ def edit_func(fname, oname): # Read and validate the YAML of an ASDF file. yaml_text = read_and_validate_yaml(fd, fname) + fd.close() # Open a YAML file for the ASDF YAML. if not is_yaml_file(oname): @@ -492,8 +493,10 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): if alloc > 0: chunk = ifd.read(alloc) ofd.write(chunk) + ifd.close() write_block_index(ofd, block_index) + ofd.close() # Rename temp file. os.rename(tmp_oname, oname) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index ecc6ea2c9..a85545f2a 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -35,11 +35,11 @@ def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): fd.write(new_content) -def _initialize_test(tmpdir, version): - asdf_base = os.path.join(tmpdir, "base.asdf") - yaml_base = os.path.join(tmpdir, "base.yaml") - asdf_edit = os.path.join(tmpdir, "edit.asdf") - yaml_edit = os.path.join(tmpdir, "edit.yaml") +def _initialize_test(tmpdir, version, test_name): + asdf_base = os.path.join(tmpdir, f"{test_name}_base.asdf") + yaml_base = os.path.join(tmpdir, f"{test_name}_base.yaml") + asdf_edit = os.path.join(tmpdir, f"{test_name}_edit.asdf") + yaml_edit = os.path.join(tmpdir, f"{test_name}_edit.yaml") _create_base_asdf(version, asdf_base) shutil.copyfile(asdf_base, asdf_edit) @@ -52,12 +52,12 @@ def _initialize_test(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_smaller(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version, "smaller") _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 2") args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] - ret = main.main_from_args(args) + main.main_from_args(args) assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) with asdf.open(asdf_edit) as af: @@ -66,12 +66,12 @@ def test_edit_smaller(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_equal(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version, "equal") _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 41") args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] - ret = main.main_from_args(args) + main.main_from_args(args) assert os.path.getsize(asdf_edit) == os.path.getsize(asdf_base) with asdf.open(asdf_edit) as af: @@ -80,12 +80,12 @@ def test_edit_equal(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_larger(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version, "larger") _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 42\nbar: 13") args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] - ret = main.main_from_args(args) + main.main_from_args(args) assert os.path.getsize(asdf_edit) - os.path.getsize(asdf_base) > 10000 with asdf.open(asdf_edit) as af: From a74f839193ceedea44e5abc8a0992bfe4778cb9b Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 23 Sep 2020 11:39:36 -0400 Subject: [PATCH 34/47] Updating user messages to properly reflect changes to how edit works. --- asdf/commands/edit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 6d4e4349a..1ce0be6df 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -507,7 +507,7 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): print(f"The text in '{fname}' was too large to simply overwrite the") print(f"text in '{oname}'. The file '{oname}' was rewritten to") print("accommodate the larger text size.") - print(f"Also, added a '\\n' and {pad_size:,} '\\0' as a pad for") + print(f"Also, added a '\\n' and {pad_size:,} ' ' as a pad for") print(f"the text in '{oname}' to allow for future edits.") print(f"{delim}\n") @@ -563,7 +563,7 @@ def save_func(fname, oname): print(f"\n{msg_delim}") print(f"The edited text in '{fname}' was written to '{oname}'") print( - f"Added a '\\n' and {diff} pad of '\\0' between the YAML text and binary blocks." + f"Added a '\\n' and {diff} pad of ' ' between the YAML text and binary blocks." ) print(f"{msg_delim}\n") else: From b95f2a7e03221d430621c0b4e53d375d48ee67e2 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 23 Sep 2020 12:42:42 -0400 Subject: [PATCH 35/47] The Windows tests are still failing, so I added the 'with' idiom when creating the base ASDF file. --- asdf/commands/tests/test_edit.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index a85545f2a..02e9c562e 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -23,8 +23,8 @@ def _create_base_asdf(version, oname): "sequence": seq, } - af = asdf.AsdfFile(tree, version=version) - af.write_to(oname) + with asdf.AsdfFile(tree, version=version) as af: + af.write_to(oname) def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): @@ -52,7 +52,9 @@ def _initialize_test(tmpdir, version, test_name): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_smaller(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version, "smaller") + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( + tmpdir, version, "smaller" + ) _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 2") @@ -66,7 +68,9 @@ def test_edit_smaller(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_equal(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version, "equal") + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( + tmpdir, version, "equal" + ) _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 41") @@ -80,7 +84,9 @@ def test_edit_equal(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_larger(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version, "larger") + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( + tmpdir, version, "larger" + ) _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 42\nbar: 13") From f38b186758253a18c99fa1df7eb2f2d99d1a2b94 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 23 Sep 2020 12:58:44 -0400 Subject: [PATCH 36/47] The method 'os.rename' always raises an error on Windows. Switching to using 'os.replace'. --- asdf/commands/edit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 1ce0be6df..344e4c315 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -499,7 +499,7 @@ def rewrite_asdf_file(edited_text, orig_text, oname, fname): ofd.close() # Rename temp file. - os.rename(tmp_oname, oname) + os.replace(tmp_oname, oname) # Output message to user. delim = "*" * 70 From 4f9bf9d6841cdb66fde7026ab6078b6d56500e12 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 23 Sep 2020 13:52:09 -0400 Subject: [PATCH 37/47] There are still Travis-CI failures on Windows. Changing the way to create the ASDF file to be edited by using asdf.write_to instead of simply copying the base ASDF using shutil. --- asdf/commands/tests/test_edit.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 02e9c562e..2b7ee6c12 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -1,6 +1,5 @@ import os import re -import shutil import numpy as np import pytest @@ -42,7 +41,7 @@ def _initialize_test(tmpdir, version, test_name): yaml_edit = os.path.join(tmpdir, f"{test_name}_edit.yaml") _create_base_asdf(version, asdf_base) - shutil.copyfile(asdf_base, asdf_edit) + _create_base_asdf(version, asdf_edit) args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{yaml_base}"] main.main_from_args(args) From acb658e82f5d20113930a9a0ac69fae0bc55627b Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Thu, 24 Sep 2020 11:36:29 -0400 Subject: [PATCH 38/47] Adding binary read and writes to the test to hopefully fix the test on Windows. --- asdf/commands/tests/test_edit.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 2b7ee6c12..b7d9d6162 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -27,10 +27,10 @@ def _create_base_asdf(version, oname): def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): - with open(base_yaml) as fd: + with open(base_yaml,"rb") as fd: content = fd.read() new_content = re.sub(pattern, replacement, content) - with open(edited_yaml, "w") as fd: + with open(edited_yaml, "wb") as fd: fd.write(new_content) @@ -55,7 +55,7 @@ def test_edit_smaller(tmpdir, version): tmpdir, version, "smaller" ) - _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 2") + _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 2") args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] main.main_from_args(args) @@ -71,7 +71,7 @@ def test_edit_equal(tmpdir, version): tmpdir, version, "equal" ) - _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 41") + _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 41") args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] main.main_from_args(args) @@ -87,7 +87,7 @@ def test_edit_larger(tmpdir, version): tmpdir, version, "larger" ) - _create_edited_yaml(yaml_base, yaml_edit, "foo: 42", "foo: 42\nbar: 13") + _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 42\nbar: 13") args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] main.main_from_args(args) From 5fa8171da906f8f905d96b7811ec6fbace2246dd Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Thu, 24 Sep 2020 12:26:03 -0400 Subject: [PATCH 39/47] Adding formatting change to conform with style standards. --- asdf/commands/tests/test_edit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index b7d9d6162..c4ef20889 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -27,7 +27,7 @@ def _create_base_asdf(version, oname): def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): - with open(base_yaml,"rb") as fd: + with open(base_yaml, "rb") as fd: content = fd.read() new_content = re.sub(pattern, replacement, content) with open(edited_yaml, "wb") as fd: From 0102261e35ff466814de243306bd6430ac0c58ff Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Fri, 2 Oct 2020 12:57:08 -0400 Subject: [PATCH 40/47] Incorporating PR feedback and correctly implementing padding. The cases for edited YAML portions smaller or equal to the existing YAML in an ASDF file are complete. --- asdf/commands/edit.py | 281 +++++++++---------------------- asdf/commands/tests/test_edit.py | 20 ++- 2 files changed, 91 insertions(+), 210 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 344e4c315..5d03a4b22 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -35,7 +35,7 @@ def setup_arguments(cls, subparsers): # Set up the parser parser = subparsers.add_parser( - str("edit"), + "edit", help="Edit YAML portion of an ASDF file.", description=desc_string, ) @@ -60,6 +60,18 @@ def setup_arguments(cls, subparsers): help="Output file (YAML for -e option, ASDF for -s option", ) + """ + # Validate input YAML. Optional, since this could be what's being corrected. + parser.add_argument( + "--validate", + "-v", + type=bool, + action="store_true", + dest="validate", + help="Validate input YAML format.", + ) + """ + # The edit is either being performed or saved group = parser.add_mutually_exclusive_group(required=True) @@ -105,7 +117,7 @@ def is_yaml_file(fname): """ base, ext = os.path.splitext(fname) - if ".yaml" != ext: + if ".yaml" != ext and ".yml" != ext: return False return True @@ -173,7 +185,7 @@ def is_valid_yaml_path(fname): ------ bool """ - ext = [".yaml"] + ext = [".yaml", ".yml"] if is_valid_path_and_ext(fname, ext): return True print(f"Error: '{fname}' should have extension '{ext[0]}'") @@ -233,7 +245,7 @@ def open_and_check_asdf_header(fname): return fd, header_and_comment # Return GenericFile and ASDF header bytes. -def read_and_validate_yaml(fd, fname): +def read_and_validate_yaml(fd, fname, validate_yaml): """ Get the YAML text from an ASDF formatted file. @@ -263,14 +275,15 @@ def read_and_validate_yaml(fd, fname): ) yaml_content = reader.read() - # Create a YAML tree to validate - # The YAML text must be converted to a stream. - tree = yamlutil.load_tree(io.BytesIO(yaml_content)) - if tree is None: - print("Error: 'yamlutil.load_tree' failed to return a tree.") - sys.exist(1) + if validate_yaml: + # Create a YAML tree to validate + # The YAML text must be converted to a stream. + tree = yamlutil.load_tree(io.BytesIO(yaml_content)) + if tree is None: + print("Error: 'yamlutil.load_tree' failed to return a tree.") + sys.exist(1) - schema.validate(tree, None) # Failure raises an exception. + schema.validate(tree, None) # Failure raises an exception. return yaml_content @@ -291,18 +304,17 @@ def edit_func(fname, oname): if not is_valid_asdf_path(fname): return False + if not is_yaml_file(oname): + print("A YAML file is expected, with '.yaml' or '.yml' extension.") + sys.exit(1) + # Validate input file is an ASDF file. fd, asdf_text = open_and_check_asdf_header(fname) # Read and validate the YAML of an ASDF file. - yaml_text = read_and_validate_yaml(fd, fname) + yaml_text = read_and_validate_yaml(fd, fname, False) fd.close() - # Open a YAML file for the ASDF YAML. - if not is_yaml_file(oname): - print("A YAML file is expected, with '.yaml' extension.") - sys.exit(1) - # Write the YAML for the original ASDF file. with open(oname, "wb") as ofd: ofd.write(asdf_text) @@ -322,71 +334,6 @@ def edit_func(fname, oname): return -def get_yaml_text_and_delimiter(yaml_text): - """ - Splits the YAML text into the text and the end delimiter in preparation - for padding. - """ - - wdelim = b"\r\n...\r\n" - ldelim = b"\n...\n" - if yaml_text[-len(wdelim) :] == wdelim: - delim = wdelim - elif yaml_text[-len(ldelim) :] == ldelim: - delim = ldelim - else: - print("Unrecognized YAML delimiter ending the YAML text.") - print(f"It should be {wdelim} or {ldelim}, but the") - print(f"last {len(wdelim)} bytes are {yaml_text[-len(wdelim):]}.") - sys.exit(1) - - return yaml_text[: -len(delim)], delim - - -def pad_edited_text(edited_text, orig_text): - """ - There is more text in the original ASDF file than in the edited text, - so we will pad the edited text with spaces. - - Parameters - ---------- - edited_text - The text from the edited YAML file - orig_text - The text from the original ASDF file - - Return - ------ - The padded text and the number of spaces added as pad. - """ - diff = len(orig_text) - len(edited_text) - - edited_text, delim = get_yaml_text_and_delimiter(edited_text) - - padded_text = edited_text + b"\n" + b" " * (diff - 1) + delim - return padded_text, diff - 1 - - -def add_pad_to_new_text(edited_text, pad_size): - """ - Adds pad to edited text. - - Parameters - ---------- - edited_text - The text from the edited YAML file. - pad_size - The number of spaces to add as a pad. - - Return - ------ - Pad text with the number of spaces requested as pad. - """ - - edited_text, delim = get_yaml_text_and_delimiter(edited_text) - - pad = b" " * pad_size - padded_text = edited_text + b"\n" + pad + delim - - return padded_text - - def write_block_index(fd, index): """ Write the block index to an ASDF file. @@ -410,106 +357,46 @@ def write_block_index(fd, index): return -def get_next_block_header(fd): +def find_first_block(fname): """ - From a file, gets the next block header. + Finds the location of the first binary block in an ASDF file. + Parameters ---------- - fd - The ASDF file to get the next block. + fname : str + Input ASDF file name. Return ------ - If a block is found, return the bytes of the block header. - Otherwise return None. - """ - # Block header structure: - # 4 bytes of magic number - # 2 bytes of header length, after the length field (min 48) - # 4 bytes flag - # 4 bytes compression - # 8 bytes allocated size - # 8 bytes used (on disk) size - # 8 bytes data size - # 16 bytes checksum - blk_header = fd.read(6) - if len(blk_header) != 6: - return None - if not blk_header.startswith(constants.BLOCK_MAGIC): - return None - hsz = struct.unpack(">H", blk_header[4:6])[0] - header = fd.read(hsz) - return blk_header + header - - -def rewrite_asdf_file(edited_text, orig_text, oname, fname): - """ - Rewrite an ASDF file for too large edited YAML. The edited YAML, a pad, - the blocks will be rewritten. A block index will also be rewritten. If a - block index existed in the old file, it will have to be recomputed to - because of the larger YAML size and pad, which changes the location of - the binary blocks. - - Parameters - ---------- - edited_text : the new YAML text to write out. - orig_text : the original YAML text to overwrite. - oname : the ASDF file to overwrite. - fname : the edit YAML to write to new file. - """ - - tmp_oname = oname + ".tmp" # Save as a temp file, in case anything goes wrong. - pad_size = 10 * 1000 - padded_text = add_pad_to_new_text(edited_text, pad_size) - - ifd = open(oname, "r+b") # Open old ASDF to get binary blocks - ifd.seek(len(orig_text)) - - ofd = open(tmp_oname, "w+b") # Open temp file to write - ofd.write(padded_text) # Write edited YAML - - current_location = len(padded_text) - block_index = [] - alloc_loc = 14 # 4 bytes of block ID, 2 blocks of size, 8 blocks into header - block_chunk = 2048 - while True: - next_block = get_next_block_header(ifd) - if next_block is None: - break - - # Get block size on disk - alloc = struct.unpack(">Q", next_block[alloc_loc : alloc_loc + 8])[0] - - # Save block location for block index - block_index.append(current_location) - current_location = current_location + len(next_block) + alloc - - # Copy block - ofd.write(next_block) - while alloc >= block_chunk: - chunk = ifd.read(block_chunk) - ofd.write(chunk) - alloc -= block_chunk - if alloc > 0: - chunk = ifd.read(alloc) - ofd.write(chunk) - ifd.close() + Location, in bytes, of the first binary block. + """ + with generic_io.get_file(fname, mode="r") as fd: + reader = fd.reader_until( + constants.BLOCK_MAGIC, + 7, + "First binary block", + include=False, + ) + content_to_first_block = reader.read() + return len(content_to_first_block) - write_block_index(ofd, block_index) - ofd.close() +def write_edited_yaml_larger (oname, edited_yaml, first_block_loc): + print("Larger") - # Rename temp file. - os.replace(tmp_oname, oname) - # Output message to user. - delim = "*" * 70 - print(f"\n{delim}") - print(f"The text in '{fname}' was too large to simply overwrite the") - print(f"text in '{oname}'. The file '{oname}' was rewritten to") - print("accommodate the larger text size.") - print(f"Also, added a '\\n' and {pad_size:,} ' ' as a pad for") - print(f"the text in '{oname}' to allow for future edits.") - print(f"{delim}\n") +def write_edited_yaml(oname, edited_yaml, first_block_loc): + if len(edited_yaml) < first_block_loc: + pad_length = first_block_loc - len(edited_yaml) + padding = b'\0' * pad_length + with open(oname, "r+b") as fd: + fd.write(edited_yaml) + fd.write(padding) + elif len(edited_yaml) == first_block_loc: + with open(oname, "r+b") as fd: + fd.write(edited_yaml) + else: + write_edited_yaml_larger(oname, edited_yaml, first_block_loc) def save_func(fname, oname): @@ -525,8 +412,10 @@ def save_func(fname, oname): Parameters ---------- - fname : The input YAML file. - oname : The output ASDF file name. + fname : str + Input YAML file name. + oname : str + The output ASDF file name. """ if not is_valid_yaml_path(fname): @@ -537,37 +426,25 @@ def save_func(fname, oname): # Validate input file is an ASDF formatted YAML. ifd, iasdf_text = open_and_check_asdf_header(fname) - iyaml_text = read_and_validate_yaml(ifd, fname) + iyaml_text = read_and_validate_yaml(ifd, fname, True) ifd.close() edited_text = iasdf_text + iyaml_text - # Get text from ASDF file. - ofd, oasdf_text = open_and_check_asdf_header(oname) - oyaml_text = read_and_validate_yaml(ofd, oname) - ofd.close() - asdf_text = oasdf_text + oyaml_text - - # Compare text sizes and maybe output. - # There are three cases: - msg_delim = "*" * 70 - if len(edited_text) == len(asdf_text): - with open(oname, "r+b") as fd: - fd.write(edited_text) - print(f"\n{msg_delim}") - print(f"The edited text in '{fname}' was written to '{oname}'") - print(f"{msg_delim}\n") - elif len(edited_text) < len(asdf_text): - padded_text, diff = pad_edited_text(edited_text, asdf_text) - with open(oname, "r+b") as fd: - fd.write(padded_text) - print(f"\n{msg_delim}") - print(f"The edited text in '{fname}' was written to '{oname}'") - print( - f"Added a '\\n' and {diff} pad of ' ' between the YAML text and binary blocks." - ) - print(f"{msg_delim}\n") - else: - rewrite_asdf_file(edited_text, asdf_text, oname, fname) + # - Find location of first block. + loc = find_first_block(oname) + write_edited_yaml(oname, edited_text, loc) + # - Using edited_text length and first block location determine. + # space availability for new YAML. + # - Smaller: + # - Overwrite YAML in ASDF, then pad 0x00 to first b'\xd3BLK'. + # - Equal: + # - Overwrite YAML in ASDF. No padding. + # - Larger: + # - Create new file. + # - Write new YAML. + # - Add 10,000 0x00 pad bytes between '...\r?\n' and '\xd3BLK'. + # - If no streaming block found, write block index. + # - If streaming block found, handle correctly. return diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index c4ef20889..3ce43f2fa 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -34,11 +34,11 @@ def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): fd.write(new_content) -def _initialize_test(tmpdir, version, test_name): - asdf_base = os.path.join(tmpdir, f"{test_name}_base.asdf") - yaml_base = os.path.join(tmpdir, f"{test_name}_base.yaml") - asdf_edit = os.path.join(tmpdir, f"{test_name}_edit.asdf") - yaml_edit = os.path.join(tmpdir, f"{test_name}_edit.yaml") +def _initialize_test(tmpdir, version): + asdf_base = os.path.join(tmpdir, f"base.asdf") + yaml_base = os.path.join(tmpdir, f"base.yaml") + asdf_edit = os.path.join(tmpdir, f"edit.asdf") + yaml_edit = os.path.join(tmpdir, f"edit.yaml") _create_base_asdf(version, asdf_base) _create_base_asdf(version, asdf_edit) @@ -52,7 +52,7 @@ def _initialize_test(tmpdir, version, test_name): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_smaller(tmpdir, version): asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( - tmpdir, version, "smaller" + tmpdir, version ) _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 2") @@ -68,7 +68,7 @@ def test_edit_smaller(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_equal(tmpdir, version): asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( - tmpdir, version, "equal" + tmpdir, version ) _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 41") @@ -81,10 +81,11 @@ def test_edit_equal(tmpdir, version): assert af.tree["foo"] == 41 +""" @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_larger(tmpdir, version): asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( - tmpdir, version, "larger" + tmpdir, version ) _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 42\nbar: 13") @@ -95,3 +96,6 @@ def test_edit_larger(tmpdir, version): with asdf.open(asdf_edit) as af: assert "bar" in af.tree + +#TODO - Test stream +""" From 4d1cf9cdc089a627aa0f5da66458fb8eee5166f7 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Fri, 2 Oct 2020 14:54:45 -0400 Subject: [PATCH 41/47] Added changes to the edit subcommand based on PR feedback. --- asdf/commands/edit.py | 121 ++++++++++++++++++++++--------- asdf/commands/tests/test_edit.py | 8 +- 2 files changed, 90 insertions(+), 39 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 5d03a4b22..bf4290401 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -27,9 +27,9 @@ def setup_arguments(cls, subparsers): """ desc_string = ( "Allows for easy editing of the YAML in an ASDF file. " - "For edit mode, the YAML portion of an ASDF file is" - "separated from the ASDF into a text file for easy" - "editing. For save mode, the edited text file is written" + "For edit mode, the YAML portion of an ASDF file is " + "separated from the ASDF into a text file for easy " + "editing. For save mode, the edited text file is written " "to its ASDF file." ) @@ -47,7 +47,7 @@ def setup_arguments(cls, subparsers): type=str, required=True, dest="fname", - help="Input file (ASDF for -e option, YAML for -s option", + help="Input file (ASDF for -e option, YAML for -s option)", ) # Need an output file @@ -57,7 +57,7 @@ def setup_arguments(cls, subparsers): type=str, required=True, dest="oname", - help="Output file (YAML for -e option, ASDF for -s option", + help="Output file (YAML for -e option, ASDF for -s option)", ) """ @@ -130,7 +130,8 @@ def is_valid_path_and_ext(fname, wanted_ext=None): ---------- fname : str Input file name. - wanted_ext : List of extensions to check. + wanted_ext : List of str, optional + Extensions to check Return ------ @@ -203,7 +204,8 @@ def check_asdf_header(fd): Return ------ - The ASDF header line and the ASDF comment as bytes. + bytes + The ASDF header line and the ASDF comment. """ header_line = fd.read_until(b"\r?\n", 2, "newline", include=True) @@ -234,7 +236,10 @@ def open_and_check_asdf_header(fname): Return ------ - File descriptor for ASDF file and the ASDF header and ASDF comments as bytes. + GenericFile + File descriptor for ASDF file. + bytes + ASDF header and ASDF comments. """ fullpath = os.path.abspath(fname) fd = generic_io.get_file(fullpath, mode="r") @@ -253,11 +258,13 @@ def read_and_validate_yaml(fd, fname, validate_yaml): ---------- fname : str Input file name - fd : GenericFile for fname. + fd : GenericFile + for fname. Return ------ - The YAML portion of an ASDF file as bytes. + bytes + The YAML portion of an ASDF file. """ YAML_TOKEN = b"%YAML" token = fd.read(len(YAML_TOKEN)) @@ -283,7 +290,7 @@ def read_and_validate_yaml(fd, fname, validate_yaml): print("Error: 'yamlutil.load_tree' failed to return a tree.") sys.exist(1) - schema.validate(tree, None) # Failure raises an exception. + schema.validate(tree) # Failure raises an exception. return yaml_content @@ -361,7 +368,6 @@ def find_first_block(fname): """ Finds the location of the first binary block in an ASDF file. - Parameters ---------- fname : str @@ -369,7 +375,8 @@ def find_first_block(fname): Return ------ - Location, in bytes, of the first binary block. + int + Location, in bytes, of the first binary block. """ with generic_io.get_file(fname, mode="r") as fd: reader = fd.reader_until( @@ -381,22 +388,85 @@ def find_first_block(fname): content_to_first_block = reader.read() return len(content_to_first_block) -def write_edited_yaml_larger (oname, edited_yaml, first_block_loc): - print("Larger") +def copy_binary_blocks(ofd, ifd): + """ + Copies the binary blocks from the input ASDF to the output ASDF. + + Parameters + ---------- + ofd: file descriptor + Output ASDF file. + ifd: file descriptor + Input ASDF file. + """ + block_index = [] # A new block index needs to be computed. + + while True: + token_length = ifd.read(6) + if not token_length.startswith(constants.BLOCK_MAGIC): + ifd.seek(-6,os.SEEK_CUR) + break + + +def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc): + """ + The edited YAML is too large to simply overwrite the exiting YAML in an + ASDF file, so the ASDF file needs to be rewritten. + + Parameters + ---------- + oname : str + Input ASDF file name. + edited_yaml : byte string + The edited YAML to be saved to an ASDF file + first_block_location : the location in the ASDF file for the first binary + block + """ + tmp_oname = oname + "tmp" + ifd = open(oname,"rb") + ifd.seek(first_block_loc) -def write_edited_yaml(oname, edited_yaml, first_block_loc): + ofd = open(tmp_oname,"wb") + ofd.write(edited_yaml) + + pad_length = 10000 + padding = b'\0' * pad_length + ofd.write(padding) + + copy_binary_blocks(ofd, ifd) + + ifd.close() + ofd.close() + # os.replace(tmp_oname,oname) + + +def write_edited_yaml(fname, oname, edited_yaml, first_block_loc): + """ + Write the edited YAML is to an existing ASDF file. + + Parameters + ---------- + oname : str + Input ASDF file name. + edited_yaml : byte string + The edited YAML to be saved to an ASDF file + first_block_location : the location in the ASDF file for the first binary + block + """ if len(edited_yaml) < first_block_loc: + # The YAML in the ASDF can simply be overwritten pad_length = first_block_loc - len(edited_yaml) padding = b'\0' * pad_length with open(oname, "r+b") as fd: fd.write(edited_yaml) fd.write(padding) elif len(edited_yaml) == first_block_loc: + # The YAML in the ASDF can simply be overwritten with open(oname, "r+b") as fd: fd.write(edited_yaml) else: - write_edited_yaml_larger(oname, edited_yaml, first_block_loc) + write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc) def save_func(fname, oname): @@ -430,23 +500,8 @@ def save_func(fname, oname): ifd.close() edited_text = iasdf_text + iyaml_text - # - Find location of first block. loc = find_first_block(oname) - write_edited_yaml(oname, edited_text, loc) - # - Using edited_text length and first block location determine. - # space availability for new YAML. - # - Smaller: - # - Overwrite YAML in ASDF, then pad 0x00 to first b'\xd3BLK'. - # - Equal: - # - Overwrite YAML in ASDF. No padding. - # - Larger: - # - Create new file. - # - Write new YAML. - # - Add 10,000 0x00 pad bytes between '...\r?\n' and '\xd3BLK'. - # - If no streaming block found, write block index. - # - If streaming block found, handle correctly. - - return + write_edited_yaml(fname, oname, edited_text, loc) def edit(args): diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 3ce43f2fa..c2e4fb7b6 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -51,9 +51,7 @@ def _initialize_test(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_smaller(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( - tmpdir, version - ) + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 2") @@ -67,9 +65,7 @@ def test_edit_smaller(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_equal(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( - tmpdir, version - ) + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 41") From b6e8ce6eb0f2f8f0d8a379ca88f7f35cdba009e3 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Mon, 5 Oct 2020 13:09:29 -0400 Subject: [PATCH 42/47] Adding rewrite functionality to the 'save' option, including for the streaming case. Added a test for pytesting. --- asdf/commands/edit.py | 156 ++++++++++++++++++++++++++----- asdf/commands/tests/test_edit.py | 48 ++++++++-- 2 files changed, 173 insertions(+), 31 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index bf4290401..1da017479 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -7,6 +7,7 @@ import os import struct import sys +import yaml import asdf.constants as constants @@ -18,6 +19,8 @@ __all__ = ["edit"] +yaml_version = None + class Edit(Command): @classmethod @@ -250,6 +253,33 @@ def open_and_check_asdf_header(fname): return fd, header_and_comment # Return GenericFile and ASDF header bytes. +def get_yaml_version(fd, token): + """ + A YAML token is found, so see if the YAML version can be parsed. + + Parameters + ---------- + fd : GenericFile + token : bytes + The YAML token + """ + global yaml_version + offset = fd.tell() + while True: + c = fd.read(1) + token += c + if b"\n" == c: + break + fd.seek(offset) + + # Expects a string looking like '%YAML X.X' + line = token.decode("utf-8").strip() + sl = line.split(" ") + if len(sl) == 2: + yaml_version = tuple([int(x) for x in sl[1].split(".")]) + + + def read_and_validate_yaml(fd, fname, validate_yaml): """ Get the YAML text from an ASDF formatted file. @@ -258,7 +288,7 @@ def read_and_validate_yaml(fd, fname, validate_yaml): ---------- fname : str Input file name - fd : GenericFile + fd : GenericFile for fname. Return @@ -272,6 +302,9 @@ def read_and_validate_yaml(fd, fname, validate_yaml): print(f"Error: No YAML in '{fname}'") sys.exit(0) + if validate_yaml: + get_yaml_version(fd, token) + # Get YAML reader and content reader = fd.reader_until( constants.YAML_END_MARKER_REGEX, @@ -350,18 +383,26 @@ def write_block_index(fd, index): fd - The output file to write the block index. index - A list of locations for each block. """ + global yaml_version if len(index) < 1: return - # TODO - this needs to be changed to use constants.py and pyyaml - bindex_hdr = b"#ASDF BLOCK INDEX\n%YAML 1.1\n---\n" - fd.write(bindex_hdr) - for idx in index: - ostr = f"- {idx}\n" - fd.write(ostr.encode("utf-8")) - end = b"..." - fd.write(end) - return + fd.write(constants.INDEX_HEADER) + fd.write(b"\n") + + # If no YAML version found in edited YAML force it to 1.1 + if yaml_version is None: + yaml_version = tuple([1, 1]) + yaml.dump( + index, + Dumper=yamlutil._yaml_base_dumper, + stream=fd, + explicit_start=True, + explicit_end=True, + version=yaml_version, + allow_unicode=True, + encoding="utf-8", + ) def find_first_block(fname): @@ -386,7 +427,43 @@ def find_first_block(fname): include=False, ) content_to_first_block = reader.read() - return len(content_to_first_block) + return len(content_to_first_block) + + +def get_next_binary_block_header(fd): + """ + Gets the next binary block token and length field, as well as the header. + + Parameters + ---------- + fd: file descriptor + Input ASDF file. + + Return + ------ + bytes + Binary block header + """ + min_header_sz = 48 + token_length = fd.read(6) + if not token_length.startswith(constants.BLOCK_MAGIC): + fd.seek(-6, os.SEEK_CUR) + return None + + hlen = struct.unpack(">H", token_length[4:])[0] + if hlen < min_header_sz: + print(f"Error: Invalid binary block length ({hlen}).") + print(f" Header length must be a minimum of {min_header_sz}.") + sys.exit(1) + + header = fd.read(hlen) + if len(header) != hlen: + print(f"Error: Expected to read {hlen} bytes of binary block") + print(f" header, but read only {len(header)}.") + sys.exit(1) + + return token_length + header + def copy_binary_blocks(ofd, ifd): """ @@ -399,18 +476,49 @@ def copy_binary_blocks(ofd, ifd): ifd: file descriptor Input ASDF file. """ - block_index = [] # A new block index needs to be computed. + block_index = [] # A new block index needs to be computed. + alloc_loc = 14 + chunk_sz = 1024 + block_num = 0 while True: - token_length = ifd.read(6) - if not token_length.startswith(constants.BLOCK_MAGIC): - ifd.seek(-6,os.SEEK_CUR) + header = get_next_binary_block_header(ifd) + if header is None: break + block_index.append(ofd.tell()) + + ofd.write(header) + + flags = struct.unpack(">I", header[6:10])[0] + if constants.BLOCK_FLAG_STREAMED & flags: + while True: + chunk = ifd.read(chunk_sz) + if 0==len(chunk): + return # End of file + ofd.write(chunk) + + alloc = struct.unpack(">Q", header[alloc_loc : alloc_loc + 8])[0] + while alloc >= chunk_sz: + chunk = ifd.read(chunk_sz) + if len(chunk)==0: + print("Error: Invalid reading of binary block {block_num}.") + print(" Exiting ...") + sys.exit(1) + ofd.write(chunk) + alloc -= chunk_sz + + if alloc > 0: + chunk = ifd.read(alloc) + ofd.write(chunk) + block_num += 1 + + if len(block_index) > 0: + write_block_index(ofd, block_index) def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc): """ - The edited YAML is too large to simply overwrite the exiting YAML in an + The edited YAML is too large to simply overwrite the exiting YAML in an ASDF file, so the ASDF file needs to be rewritten. Parameters @@ -422,23 +530,23 @@ def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc): first_block_location : the location in the ASDF file for the first binary block """ - tmp_oname = oname + "tmp" + tmp_oname = oname + ".tmp" - ifd = open(oname,"rb") + ifd = open(oname, "rb") ifd.seek(first_block_loc) - ofd = open(tmp_oname,"wb") + ofd = open(tmp_oname, "wb") ofd.write(edited_yaml) - pad_length = 10000 - padding = b'\0' * pad_length + pad_length = 10000 + padding = b"\0" * pad_length ofd.write(padding) copy_binary_blocks(ofd, ifd) ifd.close() ofd.close() - # os.replace(tmp_oname,oname) + os.replace(tmp_oname, oname) def write_edited_yaml(fname, oname, edited_yaml, first_block_loc): @@ -457,7 +565,7 @@ def write_edited_yaml(fname, oname, edited_yaml, first_block_loc): if len(edited_yaml) < first_block_loc: # The YAML in the ASDF can simply be overwritten pad_length = first_block_loc - len(edited_yaml) - padding = b'\0' * pad_length + padding = b"\0" * pad_length with open(oname, "r+b") as fd: fd.write(edited_yaml) fd.write(padding) @@ -466,7 +574,7 @@ def write_edited_yaml(fname, oname, edited_yaml, first_block_loc): with open(oname, "r+b") as fd: fd.write(edited_yaml) else: - write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc) + write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc) def save_func(fname, oname): diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index c2e4fb7b6..7a39afcef 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -8,6 +8,20 @@ from asdf.commands import main +def _create_base_asdf_stream(version, oname): + # Store the data in an arbitrarily nested dictionary + tree = { + "foo": 42, + "name": "Monty", + "my_stream": asdf.Stream([128], np.float64), + } + af = asdf.AsdfFile(tree) + with open(oname, "wb") as fd: + af.write_to(fd) + for k in range(5): + fd.write(np.array([k] * 128, np.float64).tobytes()) + + def _create_base_asdf(version, oname): """ In the test temp directory, create a base ASDF file to edit @@ -34,14 +48,18 @@ def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): fd.write(new_content) -def _initialize_test(tmpdir, version): +def _initialize_test(tmpdir, version, create_asdf): asdf_base = os.path.join(tmpdir, f"base.asdf") yaml_base = os.path.join(tmpdir, f"base.yaml") asdf_edit = os.path.join(tmpdir, f"edit.asdf") yaml_edit = os.path.join(tmpdir, f"edit.yaml") + """ _create_base_asdf(version, asdf_base) _create_base_asdf(version, asdf_edit) + """ + create_asdf(version, asdf_base) + create_asdf(version, asdf_edit) args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{yaml_base}"] main.main_from_args(args) @@ -51,7 +69,9 @@ def _initialize_test(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_smaller(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( + tmpdir, version, _create_base_asdf + ) _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 2") @@ -65,7 +85,9 @@ def test_edit_smaller(tmpdir, version): @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_equal(tmpdir, version): - asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(tmpdir, version) + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( + tmpdir, version, _create_base_asdf + ) _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 41") @@ -77,11 +99,10 @@ def test_edit_equal(tmpdir, version): assert af.tree["foo"] == 41 -""" @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_larger(tmpdir, version): asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( - tmpdir, version + tmpdir, version, _create_base_asdf ) _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 42\nbar: 13") @@ -93,5 +114,18 @@ def test_edit_larger(tmpdir, version): with asdf.open(asdf_edit) as af: assert "bar" in af.tree -#TODO - Test stream -""" + +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_larger_stream(tmpdir, version): + asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test( + tmpdir, version, _create_base_asdf_stream + ) + + _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 42\nbar: 13") + + args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_edit}"] + main.main_from_args(args) + assert os.path.getsize(asdf_edit) - os.path.getsize(asdf_base) > 10000 + + with asdf.open(asdf_edit) as af: + assert "bar" in af.tree From 0ab696a523a57fd66578b55b929f2232526a2760 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Mon, 5 Oct 2020 13:31:25 -0400 Subject: [PATCH 43/47] Changed docstrings to conform with standard formatting. Removed extraneous comments. --- asdf/commands/edit.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 1da017479..930d09442 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -63,18 +63,6 @@ def setup_arguments(cls, subparsers): help="Output file (YAML for -e option, ASDF for -s option)", ) - """ - # Validate input YAML. Optional, since this could be what's being corrected. - parser.add_argument( - "--validate", - "-v", - type=bool, - action="store_true", - dest="validate", - help="Validate input YAML format.", - ) - """ - # The edit is either being performed or saved group = parser.add_mutually_exclusive_group(required=True) @@ -380,8 +368,9 @@ def write_block_index(fd, index): Parameters ---------- - fd - The output file to write the block index. - index - A list of locations for each block. + fd : file descriptor + index : list + Integer location for each block. """ global yaml_version if len(index) < 1: @@ -526,9 +515,10 @@ def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc): oname : str Input ASDF file name. edited_yaml : byte string - The edited YAML to be saved to an ASDF file - first_block_location : the location in the ASDF file for the first binary - block + The edited YAML to be saved to an ASDF file. + first_block_location : int + The location in the ASDF file for the first binary block. + """ tmp_oname = oname + ".tmp" @@ -559,8 +549,8 @@ def write_edited_yaml(fname, oname, edited_yaml, first_block_loc): Input ASDF file name. edited_yaml : byte string The edited YAML to be saved to an ASDF file - first_block_location : the location in the ASDF file for the first binary - block + first_block_location : int + The location in the ASDF file for the first binary block. """ if len(edited_yaml) < first_block_loc: # The YAML in the ASDF can simply be overwritten From 27e23705c0f7d6fdc424a80423568eb4beb01e3d Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Mon, 5 Oct 2020 13:38:59 -0400 Subject: [PATCH 44/47] Format changes based on 'black' tool. --- asdf/commands/edit.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 930d09442..23b51606f 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -243,7 +243,7 @@ def open_and_check_asdf_header(fname): def get_yaml_version(fd, token): """ - A YAML token is found, so see if the YAML version can be parsed. + A YAML token is found, so see if the YAML version can be parsed. Parameters ---------- @@ -267,7 +267,6 @@ def get_yaml_version(fd, token): yaml_version = tuple([int(x) for x in sl[1].split(".")]) - def read_and_validate_yaml(fd, fname, validate_yaml): """ Get the YAML text from an ASDF formatted file. @@ -482,14 +481,14 @@ def copy_binary_blocks(ofd, ifd): if constants.BLOCK_FLAG_STREAMED & flags: while True: chunk = ifd.read(chunk_sz) - if 0==len(chunk): + if 0 == len(chunk): return # End of file ofd.write(chunk) alloc = struct.unpack(">Q", header[alloc_loc : alloc_loc + 8])[0] while alloc >= chunk_sz: chunk = ifd.read(chunk_sz) - if len(chunk)==0: + if len(chunk) == 0: print("Error: Invalid reading of binary block {block_num}.") print(" Exiting ...") sys.exit(1) @@ -518,7 +517,7 @@ def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc): The edited YAML to be saved to an ASDF file. first_block_location : int The location in the ASDF file for the first binary block. - + """ tmp_oname = oname + ".tmp" From 4d1d726aaa25da215b661d271c63f0edd8b73af0 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Mon, 5 Oct 2020 14:24:21 -0400 Subject: [PATCH 45/47] Correcting style for formatted strings and removed extraneous comments. --- asdf/commands/tests/test_edit.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 7a39afcef..31888f530 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -49,15 +49,11 @@ def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): def _initialize_test(tmpdir, version, create_asdf): - asdf_base = os.path.join(tmpdir, f"base.asdf") - yaml_base = os.path.join(tmpdir, f"base.yaml") - asdf_edit = os.path.join(tmpdir, f"edit.asdf") - yaml_edit = os.path.join(tmpdir, f"edit.yaml") + asdf_base = os.path.join(tmpdir, "base.asdf") + yaml_base = os.path.join(tmpdir, "base.yaml") + asdf_edit = os.path.join(tmpdir, "edit.asdf") + yaml_edit = os.path.join(tmpdir, "edit.yaml") - """ - _create_base_asdf(version, asdf_base) - _create_base_asdf(version, asdf_edit) - """ create_asdf(version, asdf_base) create_asdf(version, asdf_edit) From 2ebe30b337aa98a04e43a9bd84d6e97da9201bfd Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Wed, 7 Oct 2020 12:05:03 -0400 Subject: [PATCH 46/47] Removed the use of a global variable and changed the way the search for the first binary block occurs. --- asdf/commands/edit.py | 75 +++++++++++++++++++++++--------- asdf/commands/tests/test_edit.py | 2 + 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 23b51606f..4b9c29958 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -19,8 +19,6 @@ __all__ = ["edit"] -yaml_version = None - class Edit(Command): @classmethod @@ -250,8 +248,11 @@ def get_yaml_version(fd, token): fd : GenericFile token : bytes The YAML token + + Return + ------ + yaml_version: tuple """ - global yaml_version offset = fd.tell() while True: c = fd.read(1) @@ -261,11 +262,14 @@ def get_yaml_version(fd, token): fd.seek(offset) # Expects a string looking like '%YAML X.X' + yaml_version = None line = token.decode("utf-8").strip() sl = line.split(" ") if len(sl) == 2: yaml_version = tuple([int(x) for x in sl[1].split(".")]) + return yaml_version + def read_and_validate_yaml(fd, fname, validate_yaml): """ @@ -282,15 +286,17 @@ def read_and_validate_yaml(fd, fname, validate_yaml): ------ bytes The YAML portion of an ASDF file. + yaml_version: tuple or None """ YAML_TOKEN = b"%YAML" token = fd.read(len(YAML_TOKEN)) if token != YAML_TOKEN: print(f"Error: No YAML in '{fname}'") - sys.exit(0) + sys.exit(1) + yaml_version = None if validate_yaml: - get_yaml_version(fd, token) + yaml_version = get_yaml_version(fd, token) # Get YAML reader and content reader = fd.reader_until( @@ -312,7 +318,7 @@ def read_and_validate_yaml(fd, fname, validate_yaml): schema.validate(tree) # Failure raises an exception. - return yaml_content + return yaml_content, yaml_version def edit_func(fname, oname): @@ -339,7 +345,7 @@ def edit_func(fname, oname): fd, asdf_text = open_and_check_asdf_header(fname) # Read and validate the YAML of an ASDF file. - yaml_text = read_and_validate_yaml(fd, fname, False) + yaml_text, _ = read_and_validate_yaml(fd, fname, False) fd.close() # Write the YAML for the original ASDF file. @@ -350,7 +356,6 @@ def edit_func(fname, oname): # Output message to user. delim = "*" * 70 print(f"\n{delim}") - print("ASDF formatting and YAML schema validated.") print(f"The text portion of '{fname}' is written to:") print(f" '{oname}'") print(f"The file '{oname}' can be edited using your favorite text editor.") @@ -361,7 +366,7 @@ def edit_func(fname, oname): return -def write_block_index(fd, index): +def write_block_index(fd, index, yaml_version): """ Write the block index to an ASDF file. @@ -370,8 +375,8 @@ def write_block_index(fd, index): fd : file descriptor index : list Integer location for each block. + yaml_version: tuple """ - global yaml_version if len(index) < 1: return @@ -408,14 +413,25 @@ def find_first_block(fname): Location, in bytes, of the first binary block. """ with generic_io.get_file(fname, mode="r") as fd: + # Read past possible BLOCK_MAGIC being in YAML + reader = fd.reader_until( + constants.YAML_END_MARKER_REGEX, + 7, + "End of YAML marker", + include=True, + ) + reader.read() # Read to the end of the YAML delimiter. + + # Find location of the first binary block after the end of the YAML. reader = fd.reader_until( constants.BLOCK_MAGIC, 7, "First binary block", include=False, ) - content_to_first_block = reader.read() - return len(content_to_first_block) + reader.read() # Read to the beginning of the first binary block. + binary_block_location = fd.tell() + return binary_block_location def get_next_binary_block_header(fd): @@ -453,7 +469,7 @@ def get_next_binary_block_header(fd): return token_length + header -def copy_binary_blocks(ofd, ifd): +def copy_binary_blocks(ofd, ifd, yaml_version): """ Copies the binary blocks from the input ASDF to the output ASDF. @@ -463,6 +479,7 @@ def copy_binary_blocks(ofd, ifd): Output ASDF file. ifd: file descriptor Input ASDF file. + yaml_version: tuple """ block_index = [] # A new block index needs to be computed. alloc_loc = 14 @@ -501,10 +518,10 @@ def copy_binary_blocks(ofd, ifd): block_num += 1 if len(block_index) > 0: - write_block_index(ofd, block_index) + write_block_index(ofd, block_index, yaml_version) -def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc): +def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc, yaml_version): """ The edited YAML is too large to simply overwrite the exiting YAML in an ASDF file, so the ASDF file needs to be rewritten. @@ -517,7 +534,7 @@ def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc): The edited YAML to be saved to an ASDF file. first_block_location : int The location in the ASDF file for the first binary block. - + yaml_version: tuple """ tmp_oname = oname + ".tmp" @@ -531,14 +548,14 @@ def write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc): padding = b"\0" * pad_length ofd.write(padding) - copy_binary_blocks(ofd, ifd) + copy_binary_blocks(ofd, ifd, yaml_version) ifd.close() ofd.close() os.replace(tmp_oname, oname) -def write_edited_yaml(fname, oname, edited_yaml, first_block_loc): +def write_edited_yaml(fname, oname, edited_yaml, first_block_loc, yaml_version): """ Write the edited YAML is to an existing ASDF file. @@ -550,7 +567,9 @@ def write_edited_yaml(fname, oname, edited_yaml, first_block_loc): The edited YAML to be saved to an ASDF file first_block_location : int The location in the ASDF file for the first binary block. + yaml_version: tuple """ + padded = False if len(edited_yaml) < first_block_loc: # The YAML in the ASDF can simply be overwritten pad_length = first_block_loc - len(edited_yaml) @@ -563,7 +582,21 @@ def write_edited_yaml(fname, oname, edited_yaml, first_block_loc): with open(oname, "r+b") as fd: fd.write(edited_yaml) else: - write_edited_yaml_larger(fname, oname, edited_yaml, first_block_loc) + padded = True + write_edited_yaml_larger( + fname, oname, edited_yaml, first_block_loc, yaml_version + ) + + delim = "*" * 70 + print(f"\n{delim}") + print("The edited YAML was validated and written to:") + print(f" '{oname}'") + if padded: + print("The edited YAML was too large to simply overwrite in place, so the") + print("ASDF file was rewritten with 10,000 characters of padding added.") + else: + print("The YAML in the ASDF file was overwritten in place.") + print(f"{delim}\n") def save_func(fname, oname): @@ -593,12 +626,12 @@ def save_func(fname, oname): # Validate input file is an ASDF formatted YAML. ifd, iasdf_text = open_and_check_asdf_header(fname) - iyaml_text = read_and_validate_yaml(ifd, fname, True) + iyaml_text, yaml_version = read_and_validate_yaml(ifd, fname, True) ifd.close() edited_text = iasdf_text + iyaml_text loc = find_first_block(oname) - write_edited_yaml(fname, oname, edited_text, loc) + write_edited_yaml(fname, oname, edited_text, loc, yaml_version) def edit(args): diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 31888f530..4fb0588fd 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -109,6 +109,7 @@ def test_edit_larger(tmpdir, version): with asdf.open(asdf_edit) as af: assert "bar" in af.tree + assert af.tree["bar"] == 13 @pytest.mark.parametrize("version", asdf.versioning.supported_versions) @@ -125,3 +126,4 @@ def test_edit_larger_stream(tmpdir, version): with asdf.open(asdf_edit) as af: assert "bar" in af.tree + assert af.tree["bar"] == 13 From 5620cac3fa0e127b077e6fa9ed7241cc11bc39f1 Mon Sep 17 00:00:00 2001 From: Ken MacDonald Date: Fri, 9 Oct 2020 14:00:42 -0400 Subject: [PATCH 47/47] Updating the edit subcommand to handle trying to edit ASDF file containing no binary blocks. Added tests for these conditions. --- asdf/commands/edit.py | 57 ++++++++++++++++++-- asdf/commands/tests/test_edit.py | 92 +++++++++++++++++++++++++++++++- 2 files changed, 143 insertions(+), 6 deletions(-) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 4b9c29958..51874af5d 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -271,6 +271,32 @@ def get_yaml_version(fd, token): return yaml_version +def binary_block_exists(fd): + """ + Checks to see if there is a binary block. + + Parameters + ---------- + fd : GenericFile + """ + offset = fd.tell() + + # Find location of the first binary block after the end of the YAML. + reader = fd.reader_until( + constants.BLOCK_MAGIC, + 7, + "First binary block", + include=False, + ) + try: + reader.read() # Read to the beginning of the first binary block. + fd.seek(offset) + return True + except ValueError: + fd.seek(offset) + return False + + def read_and_validate_yaml(fd, fname, validate_yaml): """ Get the YAML text from an ASDF formatted file. @@ -281,6 +307,7 @@ def read_and_validate_yaml(fd, fname, validate_yaml): Input file name fd : GenericFile for fname. + validate_yaml: bool Return ------ @@ -308,6 +335,17 @@ def read_and_validate_yaml(fd, fname, validate_yaml): ) yaml_content = reader.read() + # YAML validation implies we are reading from a normal YAML file, so + # should not have any binary blocks. + if not validate_yaml and not binary_block_exists(fd): + delim = "!" * 70 + print(delim) + print(f"No binary blocks exist in {fname}. This ASDF file can") + print("directly edited in any text file. Or the file is poorly") + print("formatted and cannot be corrected with this tool.") + print(delim) + sys.exit(1) + if validate_yaml: # Create a YAML tree to validate # The YAML text must be converted to a stream. @@ -335,11 +373,11 @@ def edit_func(fname, oname): Output YAML file name. """ if not is_valid_asdf_path(fname): - return False + return 1 if not is_yaml_file(oname): print("A YAML file is expected, with '.yaml' or '.yml' extension.") - sys.exit(1) + return 1 # Validate input file is an ASDF file. fd, asdf_text = open_and_check_asdf_header(fname) @@ -429,7 +467,16 @@ def find_first_block(fname): "First binary block", include=False, ) - reader.read() # Read to the beginning of the first binary block. + try: + reader.read() # Read to the beginning of the first binary block. + except ValueError: + delim = "!" * 50 + print(delim) + print(f"No binary blocks are found in {fname}.") + print("The file should be directly edited in a standard text editor") + print("without use of this tool.") + print(delim) + sys.exit(1) binary_block_location = fd.tell() return binary_block_location @@ -619,10 +666,10 @@ def save_func(fname, oname): """ if not is_valid_yaml_path(fname): - return False + return 1 if not is_valid_asdf_path(oname): - return False + return 1 # Validate input file is an ASDF formatted YAML. ifd, iasdf_text = open_and_check_asdf_header(fname) diff --git a/asdf/commands/tests/test_edit.py b/asdf/commands/tests/test_edit.py index 4fb0588fd..85596130e 100644 --- a/asdf/commands/tests/test_edit.py +++ b/asdf/commands/tests/test_edit.py @@ -27,7 +27,7 @@ def _create_base_asdf(version, oname): In the test temp directory, create a base ASDF file to edit and test against. """ - seq = np.arange(100) + seq = np.arange(713) # Store the data in an arbitrarily nested dictionary tree = { @@ -40,6 +40,16 @@ def _create_base_asdf(version, oname): af.write_to(oname) +def _create_base_asdf_no_blocks(version, oname): + tree = { + "foo": 42, + "name": "Monty", + } + + with asdf.AsdfFile(tree, version=version) as af: + af.write_to(oname) + + def _create_edited_yaml(base_yaml, edited_yaml, pattern, replacement): with open(base_yaml, "rb") as fd: content = fd.read() @@ -63,6 +73,86 @@ def _initialize_test(tmpdir, version, create_asdf): return asdf_base, yaml_base, asdf_edit, yaml_edit +# --------------- Tests --------------- +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_tack_s_bad_args_bad_f(tmpdir, version): + asdf_base = os.path.join(tmpdir, "base.asdf") + + args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{asdf_base}"] + with pytest.raises(SystemExit) as e: + main.main(args) + assert e.value.code == 1 + + +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_tack_s_bad_args_bad_o(tmpdir, version): + yaml_base = os.path.join(tmpdir, "base.yaml") + + args = ["edit", "-e", "-f", f"{yaml_base}", "-o", f"{yaml_base}"] + with pytest.raises(SystemExit) as e: + main.main(args) + assert e.value.code == 1 + + +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_tack_e_bad_args_bad_o(tmpdir, version): + asdf_base = os.path.join(tmpdir, "base.asdf") + + args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{asdf_base}"] + with pytest.raises(SystemExit) as e: + main.main(args) + assert e.value.code == 1 + + +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_tack_e_bad_args_bad_f(tmpdir, version): + yaml_base = os.path.join(tmpdir, "base.yaml") + + args = ["edit", "-e", "-f", f"{yaml_base}", "-o", f"{yaml_base}"] + with pytest.raises(SystemExit) as e: + main.main(args) + assert e.value.code == 1 + + +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_bad_args(tmpdir, version): + asdf_base = os.path.join(tmpdir, "base.asdf") + + args = ["edit", "-e", "-f", f"{asdf_base}"] + with pytest.raises(SystemExit) as e: + main.main(args) + assert e.value.code == 2 + + +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_save_to_no_binary(tmpdir, version): + asdf_base = os.path.join(tmpdir, "base.asdf") + yaml_base = os.path.join(tmpdir, "base.yaml") + yaml_edit = os.path.join(tmpdir, "edit.yaml") + + _create_base_asdf_no_blocks(version, asdf_base) + _create_base_asdf_no_blocks(version, yaml_base) + + _create_edited_yaml(yaml_base, yaml_edit, b"foo: 42", b"foo: 2") + + args = ["edit", "-s", "-f", f"{yaml_edit}", "-o", f"{asdf_base}"] + with pytest.raises(SystemExit) as e: + main.main(args) + assert e.value.code == 1 + + +@pytest.mark.parametrize("version", asdf.versioning.supported_versions) +def test_edit_no_binary(tmpdir, version): + asdf_base = os.path.join(tmpdir, "base.asdf") + yaml_base = os.path.join(tmpdir, "base.yaml") + _create_base_asdf_no_blocks(version, asdf_base) + + args = ["edit", "-e", "-f", f"{asdf_base}", "-o", f"{yaml_base}"] + with pytest.raises(SystemExit) as e: + main.main(args) + assert e.value.code == 1 + + @pytest.mark.parametrize("version", asdf.versioning.supported_versions) def test_edit_smaller(tmpdir, version): asdf_base, yaml_base, asdf_edit, yaml_edit = _initialize_test(