diff --git a/ci/actions/run_tests/entrypoint.sh b/ci/actions/run_tests/entrypoint.sh index a854f82dfe..f34d3040e8 100644 --- a/ci/actions/run_tests/entrypoint.sh +++ b/ci/actions/run_tests/entrypoint.sh @@ -48,7 +48,7 @@ fi # install Pillow library needed for diff testing # this will be replaced with better image diffing package used by METplotpy -pip_command="pip3 install Pillow" +pip_command="pip3 install Pillow; yum -y install poppler-utils; pip3 install pdf2image" # build command to run command="./ci/jobs/run_use_cases.py ${CATEGORIES} ${SUBSETLIST}" diff --git a/ci/jobs/run_use_cases.py b/ci/jobs/run_use_cases.py index 1ca795c1a3..80db5f8c59 100755 --- a/ci/jobs/run_use_cases.py +++ b/ci/jobs/run_use_cases.py @@ -69,13 +69,16 @@ def copy_diff_output(diff_files): and file path of output that was just generated. Either tuple value may be an empty string if the file was not found. """ - for truth_file, out_file, _ in diff_files: + for truth_file, out_file, _, diff_file in diff_files: if truth_file: copy_to_diff_dir(truth_file, 'truth') if out_file: copy_to_diff_dir(out_file, 'output') + if diff_file: + copy_to_diff_dir(diff_file, + 'diff') def copy_to_diff_dir(file_path, data_type): """! Generate output path based on input file path, @@ -97,8 +100,12 @@ def copy_to_diff_dir(file_path, data_type): diff_out = file_path.replace(data_dir, DIFF_DIR) # add data type identifier to filename before extension - output_path, extension = os.path.splitext(diff_out) - output_path = f'{output_path}_{data_type}{extension}' + # if data is not difference output + if data_type == 'diff': + output_path = diff_out + else: + output_path, extension = os.path.splitext(diff_out) + output_path = f'{output_path}_{data_type}{extension}' # create output directory if it doesn't exist output_dir = os.path.dirname(output_path) @@ -143,7 +150,9 @@ def main(): if compare and isOK: print('******************************') print("Comparing output to truth data") - diff_files = compare_dir(TRUTH_DIR, OUTPUT_DIR, debug=True) + diff_files = compare_dir(TRUTH_DIR, OUTPUT_DIR, + debug=True, + save_diff=True) if diff_files: isOK = False diff --git a/ci/util/diff_util.py b/ci/util/diff_util.py index 9fd79c283a..05ea986f91 100644 --- a/ci/util/diff_util.py +++ b/ci/util/diff_util.py @@ -19,10 +19,13 @@ '.zip', ] -UNSUPPORTED_EXTENSIONS = [ +PDF_EXTENSIONS = [ '.pdf', ] +UNSUPPORTED_EXTENSIONS = [ +] + def get_file_type(filepath): _, file_extension = os.path.splitext(filepath) if file_extension in IMAGE_EXTENSIONS: @@ -43,15 +46,18 @@ def get_file_type(filepath): if file_extension in SKIP_EXTENSIONS: return 'skip' + if file_extension in PDF_EXTENSIONS: + return 'pdf' + if file_extension in UNSUPPORTED_EXTENSIONS: return f'unsupported{file_extension}' return 'unknown' -def compare_dir(dir_a, dir_b, debug=False): - # if input are files and not directories, compare them +def compare_dir(dir_a, dir_b, debug=False, save_diff=False): + # if input are files and not directories, compare them if os.path.isfile(dir_a): - result = compare_files(dir_a, dir_b, debug=debug) + result = compare_files(dir_a, dir_b, debug=debug, save_diff=save_diff) if result is None or result is True: return [] @@ -83,7 +89,8 @@ def compare_dir(dir_a, dir_b, debug=False): filepath_b, debug=debug, dir_a=dir_a, - dir_b=dir_b) + dir_b=dir_b, + save_diff=save_diff) # no differences of skipped if result is None or result is True: @@ -101,14 +108,20 @@ def compare_dir(dir_a, dir_b, debug=False): filepath_b = os.path.join(root, filename) filepath_a = filepath_b.replace(dir_b, dir_a) if not os.path.exists(filepath_a): + # check if missing file is actually diff file that was generated + diff_list = [item[3] for item in diff_files] + if filepath_b in diff_list: + continue print(f"ERROR: File does not exist: {filepath_a}") - diff_files.append(('', filepath_b, 'file not found (new output)')) + diff_files.append(('', filepath_b, 'file not found (new output)', '')) print("\nSummary:\n") if diff_files: print("\nERROR: Some differences were found") - for filepath_a, filepath_b, reason in diff_files: + for filepath_a, filepath_b, reason, diff_file in diff_files: print(f"{reason}\n A:{filepath_a}\n B:{filepath_b}") + if diff_file: + print(f"Difference file: {diff_file}") else: print("\nNo differences found in any files") @@ -116,7 +129,8 @@ def compare_dir(dir_a, dir_b, debug=False): "**************************************************\n\n") return diff_files -def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None): +def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None, + save_diff=False): # dir_a and dir_b are only needed if comparing file lists that need those # directories to substitute when comparing because files in the list will # have different paths @@ -127,7 +141,7 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None): if not os.path.exists(filepath_b): if debug: print(f"ERROR: File does not exist: {filepath_b}") - return (filepath_a, '', 'file not found') + return (filepath_a, '', 'file not found', '') file_type = get_file_type(filepath_a) if file_type == 'skip': @@ -136,23 +150,41 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None): if file_type.startswith('unsupported'): print(f"Unsupported file type encountered: {file_type.split('.')[1]}") - return (filepath_a, filepath_b, file_type) + return (filepath_a, filepath_b, file_type, '') if file_type == 'netcdf': print("Comparing NetCDF") if not nc_is_equal(filepath_a, filepath_b): - return (filepath_a, filepath_b, 'NetCDF diff') + return (filepath_a, filepath_b, 'NetCDF diff', '') print("No differences in NetCDF files") return True + if file_type == 'pdf': + print("Comparing PDF as images") + diff_file = compare_pdf_as_images(filepath_a, filepath_b, + save_diff=save_diff) + if diff_file is True: + print("No differences in PDF files") + return True + + if diff_file is False: + diff_file = '' + + return (filepath_a, filepath_b, 'PDF diff', diff_file) + if file_type == 'image': print("Comparing images") - if not compare_image_files(filepath_a, filepath_b): - return (filepath_a, filepath_b, 'Image diff') + diff_file = compare_image_files(filepath_a, filepath_b, + save_diff=save_diff) + if diff_file is True: + print("No differences in image files") + return True - print("No differences in image files") - return True + if diff_file is False: + diff_file = '' + + return (filepath_a, filepath_b, 'Image diff', diff_file) # if not any of the above types, use diff to compare print("Comparing text files") @@ -160,7 +192,7 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None): # if files differ, open files and handle expected diffs if not compare_txt_files(filepath_a, filepath_b, dir_a, dir_b): print(f"ERROR: File differs: {filepath_b}") - return (filepath_a, filepath_b, 'Text diff') + return (filepath_a, filepath_b, 'Text diff', '') print("No differences in text files") return True @@ -169,22 +201,67 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None): return True -def compare_image_files(filepath_a, filepath_b): - diff_count = 0 +def compare_pdf_as_images(filepath_a, filepath_b, save_diff=False): + try: + from pdf2image import convert_from_path + except ModuleNotFoundError: + print("Cannot compare PDF files without pdf2image Python package") + return False + + images_a = convert_from_path(filepath_a) + images_b = convert_from_path(filepath_b) + for image_a, image_b in zip(images_a, images_b): + image_diff = compare_images(image_a, image_b) + # no differences if None, so continue to next image from PDF + if image_diff is None: + continue + + # if skipping save diff files, return False b/c there are differences + if not save_diff: + return False + + # create difference image and return the path + return save_diff_file(image_diff, filepath_b) + + return True + +def compare_image_files(filepath_a, filepath_b, save_diff=False): image_a = Image.open(filepath_a) image_b = Image.open(filepath_b) + image_diff = compare_images(image_a, image_b) + if image_diff is None: + return True + + if not save_diff: + return False + + return save_diff_file(image_diff, filepath_b) + +def compare_images(image_a, image_b): + """! Compare pillow image objects. Returns difference image object if there + are differences or None if not. + """ + diff_count = 0 image_diff = ImageChops.difference(image_a, image_b) nx, ny = image_diff.size for x in range(0, int(nx)): for y in range(0, int(ny)): pixel = image_diff.getpixel((x, y)) - if pixel != 0 and pixel != (0, 0, 0, 0): + if pixel != 0 and pixel != (0, 0, 0, 0) and pixel != (0, 0, 0): + print(f"Difference pixel: {pixel}") diff_count += 1 if diff_count: print(f"ERROR: Found {diff_count} differences between images") - return False - return True + return image_diff + return None + +def save_diff_file(image_diff, filepath_b): + rel_path, file_extension = os.path.splitext(filepath_b) + diff_file = f'{rel_path}_diff.png' + print(f"Saving diff file: {diff_file}") + image_diff.save(diff_file, "PNG") + return diff_file def compare_txt_files(filepath_a, filepath_b, dir_a=None, dir_b=None): with open(filepath_a, 'r') as file_handle: