Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature 779 compare pdf #860

Merged
merged 46 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
1a275c6
add support for climo_cdf dictionary in GridStat
georgemccabe Mar 24, 2021
c8a2731
add support for climo_cdf dictionary in PointStat, ci-run-diff
georgemccabe Mar 24, 2021
a43d4ad
added support for output_flag and nc_pairs_flag in GridStat - updated…
georgemccabe Mar 24, 2021
70c1424
Merge branch 'develop' into feature_779_common_met_config_options
georgemccabe Mar 25, 2021
c2675a7
added more documentation about required arguments for function
georgemccabe Mar 25, 2021
0731862
add support for another METplus config name for climo_cdf.cdf_bins
georgemccabe Mar 25, 2021
764ced3
added utility to easy generate text that is needed to be added to doc…
georgemccabe Mar 25, 2021
e1fb770
added missing header for MET configuration section documentation
georgemccabe Mar 25, 2021
b1fbd63
added other advice for adding support for new MET config variable
georgemccabe Mar 29, 2021
f55b47b
Merge branch 'develop' into feature_779_common_met_config_options
georgemccabe Mar 29, 2021
bf806be
clean up formatting so it is easier to read the output
georgemccabe Mar 29, 2021
e12eebf
moved output_flag and nc_pairs_flag values from GRID_STAT_MET_CONFIG_…
georgemccabe Mar 29, 2021
aa5405a
add env var to GridStat wrapped config, updated documentation for new…
georgemccabe Mar 29, 2021
e73fcb4
trigger diff tests with ci-run-diff
georgemccabe Mar 29, 2021
cb3c38b
changed output_flag and nc_pairs_flag settings to match previous defa…
georgemccabe Mar 29, 2021
e4294b7
ci-run-diff
georgemccabe Mar 29, 2021
4ca0bb6
fixed typo in value for DMAP
georgemccabe Mar 29, 2021
b7abcda
only copy log directory to error_logs if there is an error in the use…
georgemccabe Mar 29, 2021
8ca3e4b
try to using image diff logic for pdf files
georgemccabe Mar 29, 2021
9c5a524
fix output flags to match previous settings, ci-run-diff
georgemccabe Mar 29, 2021
7dfcb72
Revert "fixed typo in value for DMAP"
georgemccabe Mar 29, 2021
24f1753
removed script that is no longer used
georgemccabe Mar 29, 2021
a10ac91
added info for checking default values in MET configs against share/m…
georgemccabe Mar 29, 2021
247f2de
rename script name to be more clear, ci-run-all-cases
georgemccabe Mar 29, 2021
c34926f
removed unused scripts
georgemccabe Mar 29, 2021
92cdfa1
added unsupported file types for diff logic so the output can be obta…
georgemccabe Mar 29, 2021
f3b632a
added comments to scripts to explain what they do and what calls them…
georgemccabe Mar 29, 2021
1aefe35
moved scripts to obtain external python requirements into a directory…
georgemccabe Mar 29, 2021
1fbfffb
change typo back to correct version since I confirmed the error logs …
georgemccabe Mar 29, 2021
667574f
updated doc util script to help generate unit tests, added tests for …
georgemccabe Mar 29, 2021
540f5e2
added support for diffing PDF files as images, ci-run-diff
georgemccabe Mar 29, 2021
086ac54
updated diff logic to convert pdf files to images to compare, added o…
georgemccabe Mar 29, 2021
3ea362e
ci-run-diff
georgemccabe Mar 29, 2021
5b021e1
install dependency of pdf2image
georgemccabe Mar 30, 2021
b5066ea
ci-run-diff
georgemccabe Mar 30, 2021
6e3424f
install poppler-utils for pdf2image, ci-run-diff
georgemccabe Mar 30, 2021
9a6ffe5
fix install of poppler
georgemccabe Mar 30, 2021
4daef23
ci-run-diff
georgemccabe Mar 30, 2021
bc96de2
use yum because docker image in centos
georgemccabe Mar 30, 2021
c9169ac
try only mounting top level data dir instead of each subdir, ci-run-diff
georgemccabe Mar 30, 2021
48ea986
change back to mounting each output directory, ci-run-diff
georgemccabe Mar 30, 2021
e685fdb
forced yum install, ci-run-diff
georgemccabe Mar 30, 2021
f391b03
skip file only found in output (not truth) if it is a diff file that …
georgemccabe Mar 30, 2021
effa5e3
merged develop and resolved conflicts
georgemccabe Mar 30, 2021
b7be3fe
fixed check for diff file, ci-run-diff
georgemccabe Mar 30, 2021
7b4a72c
fixed logic so diff file does not have _diff_diff extension
georgemccabe Mar 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/actions/run_tests/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ fi

# install Pillow library needed for diff testing
# this will be replaced with better image diffing package used by METplotpy
pip_command="pip3 install Pillow"
pip_command="pip3 install Pillow; yum -y install poppler-utils; pip3 install pdf2image"

# build command to run
command="./ci/jobs/run_use_cases.py ${CATEGORIES} ${SUBSETLIST}"
Expand Down
17 changes: 13 additions & 4 deletions ci/jobs/run_use_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,16 @@ def copy_diff_output(diff_files):
and file path of output that was just generated. Either tuple
value may be an empty string if the file was not found.
"""
for truth_file, out_file, _ in diff_files:
for truth_file, out_file, _, diff_file in diff_files:
if truth_file:
copy_to_diff_dir(truth_file,
'truth')
if out_file:
copy_to_diff_dir(out_file,
'output')
if diff_file:
copy_to_diff_dir(diff_file,
'diff')

def copy_to_diff_dir(file_path, data_type):
"""! Generate output path based on input file path,
Expand All @@ -97,8 +100,12 @@ def copy_to_diff_dir(file_path, data_type):
diff_out = file_path.replace(data_dir, DIFF_DIR)

# add data type identifier to filename before extension
output_path, extension = os.path.splitext(diff_out)
output_path = f'{output_path}_{data_type}{extension}'
# if data is not difference output
if data_type == 'diff':
output_path = diff_out
else:
output_path, extension = os.path.splitext(diff_out)
output_path = f'{output_path}_{data_type}{extension}'

# create output directory if it doesn't exist
output_dir = os.path.dirname(output_path)
Expand Down Expand Up @@ -143,7 +150,9 @@ def main():
if compare and isOK:
print('******************************')
print("Comparing output to truth data")
diff_files = compare_dir(TRUTH_DIR, OUTPUT_DIR, debug=True)
diff_files = compare_dir(TRUTH_DIR, OUTPUT_DIR,
debug=True,
save_diff=True)
if diff_files:
isOK = False

Expand Down
119 changes: 98 additions & 21 deletions ci/util/diff_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@
'.zip',
]

UNSUPPORTED_EXTENSIONS = [
PDF_EXTENSIONS = [
'.pdf',
]

UNSUPPORTED_EXTENSIONS = [
]

def get_file_type(filepath):
_, file_extension = os.path.splitext(filepath)
if file_extension in IMAGE_EXTENSIONS:
Expand All @@ -43,15 +46,18 @@ def get_file_type(filepath):
if file_extension in SKIP_EXTENSIONS:
return 'skip'

if file_extension in PDF_EXTENSIONS:
return 'pdf'

if file_extension in UNSUPPORTED_EXTENSIONS:
return f'unsupported{file_extension}'

return 'unknown'

def compare_dir(dir_a, dir_b, debug=False):
# if input are files and not directories, compare them
def compare_dir(dir_a, dir_b, debug=False, save_diff=False):
# if input are files and not directories, compare them
if os.path.isfile(dir_a):
result = compare_files(dir_a, dir_b, debug=debug)
result = compare_files(dir_a, dir_b, debug=debug, save_diff=save_diff)
if result is None or result is True:
return []

Expand Down Expand Up @@ -83,7 +89,8 @@ def compare_dir(dir_a, dir_b, debug=False):
filepath_b,
debug=debug,
dir_a=dir_a,
dir_b=dir_b)
dir_b=dir_b,
save_diff=save_diff)

# no differences of skipped
if result is None or result is True:
Expand All @@ -101,22 +108,29 @@ def compare_dir(dir_a, dir_b, debug=False):
filepath_b = os.path.join(root, filename)
filepath_a = filepath_b.replace(dir_b, dir_a)
if not os.path.exists(filepath_a):
# check if missing file is actually diff file that was generated
diff_list = [item[3] for item in diff_files]
if filepath_b in diff_list:
continue
print(f"ERROR: File does not exist: {filepath_a}")
diff_files.append(('', filepath_b, 'file not found (new output)'))
diff_files.append(('', filepath_b, 'file not found (new output)', ''))

print("\nSummary:\n")
if diff_files:
print("\nERROR: Some differences were found")
for filepath_a, filepath_b, reason in diff_files:
for filepath_a, filepath_b, reason, diff_file in diff_files:
print(f"{reason}\n A:{filepath_a}\n B:{filepath_b}")
if diff_file:
print(f"Difference file: {diff_file}")
else:
print("\nNo differences found in any files")

print("Finished comparing directories\n"
"**************************************************\n\n")
return diff_files

def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None):
def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None,
save_diff=False):
# dir_a and dir_b are only needed if comparing file lists that need those
# directories to substitute when comparing because files in the list will
# have different paths
Expand All @@ -127,7 +141,7 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None):
if not os.path.exists(filepath_b):
if debug:
print(f"ERROR: File does not exist: {filepath_b}")
return (filepath_a, '', 'file not found')
return (filepath_a, '', 'file not found', '')

file_type = get_file_type(filepath_a)
if file_type == 'skip':
Expand All @@ -136,31 +150,49 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None):

if file_type.startswith('unsupported'):
print(f"Unsupported file type encountered: {file_type.split('.')[1]}")
return (filepath_a, filepath_b, file_type)
return (filepath_a, filepath_b, file_type, '')

if file_type == 'netcdf':
print("Comparing NetCDF")
if not nc_is_equal(filepath_a, filepath_b):
return (filepath_a, filepath_b, 'NetCDF diff')
return (filepath_a, filepath_b, 'NetCDF diff', '')

print("No differences in NetCDF files")
return True

if file_type == 'pdf':
print("Comparing PDF as images")
diff_file = compare_pdf_as_images(filepath_a, filepath_b,
save_diff=save_diff)
if diff_file is True:
print("No differences in PDF files")
return True

if diff_file is False:
diff_file = ''

return (filepath_a, filepath_b, 'PDF diff', diff_file)

if file_type == 'image':
print("Comparing images")
if not compare_image_files(filepath_a, filepath_b):
return (filepath_a, filepath_b, 'Image diff')
diff_file = compare_image_files(filepath_a, filepath_b,
save_diff=save_diff)
if diff_file is True:
print("No differences in image files")
return True

print("No differences in image files")
return True
if diff_file is False:
diff_file = ''

return (filepath_a, filepath_b, 'Image diff', diff_file)

# if not any of the above types, use diff to compare
print("Comparing text files")
if not filecmp.cmp(filepath_a, filepath_b):
# if files differ, open files and handle expected diffs
if not compare_txt_files(filepath_a, filepath_b, dir_a, dir_b):
print(f"ERROR: File differs: {filepath_b}")
return (filepath_a, filepath_b, 'Text diff')
return (filepath_a, filepath_b, 'Text diff', '')

print("No differences in text files")
return True
Expand All @@ -169,22 +201,67 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None):

return True

def compare_image_files(filepath_a, filepath_b):
diff_count = 0
def compare_pdf_as_images(filepath_a, filepath_b, save_diff=False):
try:
from pdf2image import convert_from_path
except ModuleNotFoundError:
print("Cannot compare PDF files without pdf2image Python package")
return False

images_a = convert_from_path(filepath_a)
images_b = convert_from_path(filepath_b)
for image_a, image_b in zip(images_a, images_b):
image_diff = compare_images(image_a, image_b)

# no differences if None, so continue to next image from PDF
if image_diff is None:
continue

# if skipping save diff files, return False b/c there are differences
if not save_diff:
return False

# create difference image and return the path
return save_diff_file(image_diff, filepath_b)

return True

def compare_image_files(filepath_a, filepath_b, save_diff=False):
image_a = Image.open(filepath_a)
image_b = Image.open(filepath_b)
image_diff = compare_images(image_a, image_b)
if image_diff is None:
return True

if not save_diff:
return False

return save_diff_file(image_diff, filepath_b)

def compare_images(image_a, image_b):
"""! Compare pillow image objects. Returns difference image object if there
are differences or None if not.
"""
diff_count = 0
image_diff = ImageChops.difference(image_a, image_b)
nx, ny = image_diff.size
for x in range(0, int(nx)):
for y in range(0, int(ny)):
pixel = image_diff.getpixel((x, y))
if pixel != 0 and pixel != (0, 0, 0, 0):
if pixel != 0 and pixel != (0, 0, 0, 0) and pixel != (0, 0, 0):
print(f"Difference pixel: {pixel}")
diff_count += 1
if diff_count:
print(f"ERROR: Found {diff_count} differences between images")
return False
return True
return image_diff
return None

def save_diff_file(image_diff, filepath_b):
rel_path, file_extension = os.path.splitext(filepath_b)
diff_file = f'{rel_path}_diff.png'
print(f"Saving diff file: {diff_file}")
image_diff.save(diff_file, "PNG")
return diff_file

def compare_txt_files(filepath_a, filepath_b, dir_a=None, dir_b=None):
with open(filepath_a, 'r') as file_handle:
Expand Down