From 65ad6ae01b54e7173b62eed910d1a643fc4ddaa8 Mon Sep 17 00:00:00 2001 From: Anushlinux Date: Thu, 3 Oct 2024 20:00:48 +0530 Subject: [PATCH 1/2] Update image conversion script to support multiple formats --- scripts/hooks/convert_images_hook.py | 111 ++++++++++++++++++--------- 1 file changed, 73 insertions(+), 38 deletions(-) diff --git a/scripts/hooks/convert_images_hook.py b/scripts/hooks/convert_images_hook.py index 52470361..dcbeb189 100755 --- a/scripts/hooks/convert_images_hook.py +++ b/scripts/hooks/convert_images_hook.py @@ -1,41 +1,56 @@ #!/usr/bin/env python3 -"""Convert png images within the repository.""" - +"""Convert png and other images within the repository.""" import argparse import os +import sys +from PIL import Image -from scripts.utils.image_utils import convert_image, get_size_in_kb, get_size_reduction +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from utils.image_utils import get_size_in_kb, get_size_reduction +def convert_image(input_path, output_path, output_format): + with Image.open(input_path) as img: + if output_format == 'JPG': + output_format = 'JPEG' + img.save(output_path, output_format) -def convert_images_in_tree(args): - filenames = args.get("filenames", None) - trigger_size = args.get("trigger_size", None) +def bulk_convert(input_dir, output_dir, output_format, trigger_size): + os.makedirs(output_dir, exist_ok=True) converted_count = 0 - for image_path in filenames: - old_size = get_size_in_kb(image_path) - if old_size <= trigger_size: - continue - - # Note: the pre-commit hook takes care of ensuring only image files are passed here. - new_image_path = convert_image(image_path) - new_size = get_size_in_kb(new_image_path) - if new_size <= old_size: - print( - f"Converted png to jpg: {image_path}: {new_size:.2f}KB {get_size_reduction(old_size, new_size)}" - ) - converted_count += 1 - else: - print( - f"Skipping conversion for {image_path} as size is more than before ({new_size:.2f} KB > {old_size:.2f} KB)" - ) - os.remove(new_image_path) - return converted_count + for root, _, files in os.walk(input_dir): + for filename in files: + input_path = os.path.join(root, filename) + if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.tif')): + old_size = get_size_in_kb(input_path) + + name, ext = os.path.splitext(filename) + output_path = os.path.join(output_dir, f"{name}.{output_format.lower()}") + + convert_image(input_path, output_path, output_format.upper()) + new_size = get_size_in_kb(output_path) + + if old_size > trigger_size and new_size <= old_size: + print( + f"Converted {filename} to {output_format.upper()}: {new_size:.2f}KB " + f"{get_size_reduction(old_size, new_size)}" + ) + converted_count += 1 + elif old_size <= trigger_size: + print( + f"Converted {filename} to {output_format.upper()}: {new_size:.2f}KB" + ) + else: + print( + f"Skipping conversion for {filename} as size increased " + f"({new_size:.2f} KB > {old_size:.2f} KB)" + ) + # os.remove(output_path) + return converted_count def parse_args(): - # construct the argument parse and parse the arguments argparser = argparse.ArgumentParser() argparser.add_argument( @@ -46,31 +61,51 @@ def parse_args(): dest="trigger_size", help="Specify minimum file size to trigger the hook.", ) + argparser.add_argument( + "--input-dir", + default=None, + required=False, + help="Specify the input directory for bulk conversion.", + ) + argparser.add_argument( + "--output-dir", + default=None, + required=True, + help="Specify the output directory for converted files.", + ) + argparser.add_argument( + "--format", + choices=['jpg', 'jpeg', 'png'], + default='jpeg', + help="Specify the output format (default: jpeg).", + ) argparser.add_argument("filenames", nargs="*", help="Files to optimize.") - ( - args, - unknown, - ) = argparser.parse_known_args() - - args = vars(args) + args, unknown = argparser.parse_known_args() if len(unknown) > 0: argparser.print_help() raise Exception(f"\nError: Unknown arguments: {unknown}") - return args - + return vars(args) if __name__ == "__main__": args = parse_args() - converted_count = convert_images_in_tree(args) trigger_size = args["trigger_size"] + output_dir = args["output_dir"] + output_format = args["format"] + + if args.get("input_dir"): + converted_count = bulk_convert(args["input_dir"], output_dir, output_format, trigger_size) + else: + print("No input directory specified. Please provide an input directory.") + exit(1) + if converted_count > 0: print( - f"Note: {converted_count} png images above {trigger_size}KB were converted to jpg.\nPlease manually remove the png files and add your commit again." + f"Note: {converted_count} images above {trigger_size}KB were converted to {output_format.upper()}.\n" ) exit(1) else: - # print("All sample images are jpgs. Commit accepted.") - exit(0) + print("All images are optimized. Commit accepted.") + exit(0) \ No newline at end of file From 84ddb531b2275a806ba131294f359ff010d958c9 Mon Sep 17 00:00:00 2001 From: Anushlinux Date: Fri, 4 Oct 2024 15:01:01 +0530 Subject: [PATCH 2/2] changes on the basis of hooks and bulk conversion --- .pre-commit-config.yaml | 2 +- scripts/hooks/convert_images_hook.py | 113 +++++++------------ scripts/local/convert_images.py | 67 ++++-------- scripts/local/utils/bulk_ops_common.py | 143 +++++++++++++++++-------- 4 files changed, 166 insertions(+), 159 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8babe332..7008751c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: - id: convert-images name: Convert png to jpg in samples entry: ./scripts/run_python_hook.sh scripts/hooks/convert_images_hook.py - args: ["--trigger-size", "150"] + args: ["--format", "jpg"] files: ^.*\.(png|PNG)$ pass_filenames: true stages: [commit] diff --git a/scripts/hooks/convert_images_hook.py b/scripts/hooks/convert_images_hook.py index dcbeb189..9fdf5b00 100755 --- a/scripts/hooks/convert_images_hook.py +++ b/scripts/hooks/convert_images_hook.py @@ -1,56 +1,45 @@ #!/usr/bin/env python3 -"""Convert png and other images within the repository.""" +"""Convert png images within the repository.""" + import argparse import os -import sys -from PIL import Image -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from utils.image_utils import get_size_in_kb, get_size_reduction +import argparse +import os +from scripts.local.utils.bulk_ops_common import convert_image, convert_pdf_to_jpg -def convert_image(input_path, output_path, output_format): - with Image.open(input_path) as img: - if output_format == 'JPG': - output_format = 'JPEG' - img.save(output_path, output_format) +from scripts.utils.image_utils import convert_image, get_size_in_kb, get_size_reduction -def bulk_convert(input_dir, output_dir, output_format, trigger_size): - os.makedirs(output_dir, exist_ok=True) - converted_count = 0 - for root, _, files in os.walk(input_dir): - for filename in files: - input_path = os.path.join(root, filename) - if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.tif')): - old_size = get_size_in_kb(input_path) - - name, ext = os.path.splitext(filename) - output_path = os.path.join(output_dir, f"{name}.{output_format.lower()}") - - convert_image(input_path, output_path, output_format.upper()) - new_size = get_size_in_kb(output_path) - - if old_size > trigger_size and new_size <= old_size: - print( - f"Converted {filename} to {output_format.upper()}: {new_size:.2f}KB " - f"{get_size_reduction(old_size, new_size)}" - ) - converted_count += 1 - elif old_size <= trigger_size: - print( - f"Converted {filename} to {output_format.upper()}: {new_size:.2f}KB" - ) - else: - print( - f"Skipping conversion for {filename} as size increased " - f"({new_size:.2f} KB > {old_size:.2f} KB)" - ) - # os.remove(output_path) +def convert_images_in_tree(args): + filenames = args.get("filenames", None) + trigger_size = args.get("trigger_size", None) + converted_count = 0 + for image_path in filenames: + old_size = get_size_in_kb(image_path) + if old_size <= trigger_size: + continue + + # Note: the pre-commit hook takes care of ensuring only image files are passed here. + new_image_path = convert_image(image_path) + new_size = get_size_in_kb(new_image_path) + if new_size <= old_size: + print( + f"Converted png to jpg: {image_path}: {new_size:.2f}KB {get_size_reduction(old_size, new_size)}" + ) + converted_count += 1 + else: + print( + f"Skipping conversion for {image_path} as size is more than before ({new_size:.2f} KB > {old_size:.2f} KB)" + ) + os.remove(new_image_path) return converted_count + def parse_args(): + # construct the argument parse and parse the arguments argparser = argparse.ArgumentParser() argparser.add_argument( @@ -61,51 +50,31 @@ def parse_args(): dest="trigger_size", help="Specify minimum file size to trigger the hook.", ) - argparser.add_argument( - "--input-dir", - default=None, - required=False, - help="Specify the input directory for bulk conversion.", - ) - argparser.add_argument( - "--output-dir", - default=None, - required=True, - help="Specify the output directory for converted files.", - ) - argparser.add_argument( - "--format", - choices=['jpg', 'jpeg', 'png'], - default='jpeg', - help="Specify the output format (default: jpeg).", - ) argparser.add_argument("filenames", nargs="*", help="Files to optimize.") - args, unknown = argparser.parse_known_args() + ( + args, + unknown, + ) = argparser.parse_known_args() + + args = vars(args) if len(unknown) > 0: argparser.print_help() raise Exception(f"\nError: Unknown arguments: {unknown}") - return vars(args) + return args + if __name__ == "__main__": args = parse_args() + converted_count = convert_images_in_tree(args) trigger_size = args["trigger_size"] - output_dir = args["output_dir"] - output_format = args["format"] - - if args.get("input_dir"): - converted_count = bulk_convert(args["input_dir"], output_dir, output_format, trigger_size) - else: - print("No input directory specified. Please provide an input directory.") - exit(1) - if converted_count > 0: print( - f"Note: {converted_count} images above {trigger_size}KB were converted to {output_format.upper()}.\n" + f"Note: {converted_count} png images above {trigger_size}KB were converted to jpg.\nPlease manually remove the png files and add your commit again." ) exit(1) else: - print("All images are optimized. Commit accepted.") + # print("All sample images are jpgs. Commit accepted.") exit(0) \ No newline at end of file diff --git a/scripts/local/convert_images.py b/scripts/local/convert_images.py index 8cefd283..3653c18a 100644 --- a/scripts/local/convert_images.py +++ b/scripts/local/convert_images.py @@ -1,59 +1,38 @@ +#!/usr/bin/env python3 import argparse +import os +from scripts.local.utils.bulk_ops_common import convert_image -from scripts.local.utils.bulk_ops_common import add_common_args, run_argparser - - -def convert_image_to(): - # Wrapper to handle all available extensions - pass - - -def convert_images_in_tree(args): - input_directory = args.get("input", None) - recursive = args.get("recursive", None) - output_directory = args.get("output", None) - trigger_size = args.get("trigger_size", None) +def convert_images(filenames, output_format): converted_count = 0 - for image_path in filenames: - old_size = get_size_in_kb(image_path) - if old_size <= trigger_size: - continue - - # Note: the pre-commit hook takes care of ensuring only image files are passed here. - new_image_path = convert_image(image_path) - new_size = get_size_in_kb(new_image_path) - if new_size <= old_size: - print( - f"Converted png to jpg: {image_path}: {new_size:.2f}KB {get_size_reduction(old_size, new_size)}" - ) + for input_path in filenames: + if input_path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.tif')): + name, ext = os.path.splitext(input_path) + output_path = f"{name}.{output_format.lower()}" + convert_image(input_path, output_path, output_format) + print(f"Converted {input_path} to {output_format}") converted_count += 1 else: - print( - f"Skipping conversion for {image_path} as size is more than before ({new_size:.2f} KB > {old_size:.2f} KB)" - ) - os.remove(new_image_path) - + print(f"Skipping unsupported file: {input_path}") return converted_count - def parse_args(): - # construct the argument parse and parse the arguments - argparser = argparse.ArgumentParser() - add_common_args(argparser, ["--input", "--output", "--trigger-size", "--recursive"]) - args = run_argparser(argparser) + parser = argparse.ArgumentParser(description="Convert images for pre-commit hook") + parser.add_argument( + "--format", + choices=['jpg', 'png', 'jpeg'], + default='jpg', + help="Output format for images (default: jpg)" + ) + parser.add_argument("filenames", nargs="*", help="Files to convert.") + args = parser.parse_args() return args - if __name__ == "__main__": args = parse_args() - - converted_count = convert_images_in_tree(args) - trigger_size = args["trigger_size"] + converted_count = convert_images(args.filenames, args.format.upper()) if converted_count > 0: - print( - f"Note: {converted_count} png images above {trigger_size}KB were converted to jpg.\nPlease manually remove the png files and add your commit again." - ) + print(f"Note: {converted_count} images were converted.") exit(1) else: - # print("All sample images are jpgs. Commit accepted.") - exit(0) + exit(0) \ No newline at end of file diff --git a/scripts/local/utils/bulk_ops_common.py b/scripts/local/utils/bulk_ops_common.py index 071a7137..f39cf2f6 100644 --- a/scripts/local/utils/bulk_ops_common.py +++ b/scripts/local/utils/bulk_ops_common.py @@ -3,43 +3,20 @@ import glob import operator import os - +from PIL import Image +from pdf2image import convert_from_path from src.utils.file import PathUtils -# TODO: add shell utilities for simple local images processing such as: -# From issue: https://github.com/Udayraj123/OMRChecker/issues/213 -# - bulk resize, -# - clip to max width (or height) -# - with a conditional trigger if the file size exceeds a provided value -# - bulk convert : -# - pdf to jpg -# - png to jpg or vice versa -# - tiff -# - bulk rename files -# - adding folder name to file name -# - removing non-utf characters from filename (to avoid processing errors) -# - add watermark to all images -# - blur a particular section of the images (e.g. student names and signatures) -# - create a gif from a folder of images -# - Save output of cropped pages to avoid cropping in each run (and merge with manually cropped images) -# - Save output of cropped markers to avoid cropping in each run (and merge with manually cropped images) - -# Make sure to be cross-os compatible i.e. use Posix paths wherever possible - - -# Maybe have a common util file for bulk ops and then create one file for each of the above util. - - -# Usual pre-processing commands for speedups (useful during re-runs) -# python3 scripts/local/convert_images.py -i inputs/ --replace [--filter-ext png,jpg] --output-ext jpg -# python3 scripts/local/resize_images.py -i inputs/ -o outputs --max-width=1500 - +# TODO: add shell utilities for bulk image processing, resizing, watermarking, etc. def walk_and_extract_files(input_dir, file_extensions): + """ + Walks through the directory to extract files with specified extensions. + """ extracted_files = [] for _dir, _subdir, _files in os.walk(input_dir): matching_globs = [ - glob(os.path.join(_dir, f"*.{file_extension}")) + glob.glob(os.path.join(_dir, f"*.{file_extension}")) for file_extension in file_extensions ] matching_files = functools.reduce(operator.iconcat, matching_globs, []) @@ -49,8 +26,11 @@ def walk_and_extract_files(input_dir, file_extensions): def get_local_argparser(): + """ + Returns an argument parser with common input, output, and optional recursive processing flags. + """ local_argparser = argparse.ArgumentParser() - + local_argparser.add_argument( "-i", "--input", @@ -72,8 +52,7 @@ def get_local_argparser(): local_argparser.add_argument( "-r", "--recursive", - required=True, - type=bool, + action='store_true', dest="recursive", help="Specify whether to process subdirectories recursively", ) @@ -81,20 +60,71 @@ def get_local_argparser(): local_argparser.add_argument( "--trigger-size", default=200, - required=True, type=int, dest="trigger_size", - help="Specify minimum file size to trigger the hook.", + help="Specify minimum file size (KB) to trigger the hook.", ) + return local_argparser +def convert_image(input_path, output_path, output_format): + """ + Converts an image to the specified output format. + """ + with Image.open(input_path) as img: + if output_format == 'JPG': + output_format = 'JPEG' + img.save(output_path, output_format) + + +def convert_pdf_to_jpg(input_path, output_dir): + """ + Converts a PDF to a series of JPG images, one per page. + """ + pages = convert_from_path(input_path) + for i, page in enumerate(pages): + output_path = os.path.join(output_dir, f"page_{i + 1}.jpg") + page.save(output_path, 'JPEG') + + +def bulk_convert(input_dir, output_dir, output_format, in_place=False): + """ + Bulk converts images and PDFs to the specified format. + """ + os.makedirs(output_dir, exist_ok=True) + extensions = ['png', 'jpg', 'jpeg', 'tiff', 'tif', 'pdf'] + + filepaths = walk_and_extract_files(input_dir, extensions) + + for input_path in filepaths: + relative_path = os.path.relpath(os.path.dirname(input_path), input_dir) + output_subdir = os.path.join(output_dir, relative_path) if not in_place else os.path.dirname(input_path) + os.makedirs(output_subdir, exist_ok=True) + + filename = os.path.basename(input_path) + if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.tif')): + name, _ = os.path.splitext(filename) + output_path = os.path.join(output_subdir, f"{name}.{output_format.lower()}") + convert_image(input_path, output_path, output_format) + print(f"Converted {filename} to {output_format}") + elif filename.lower().endswith('.pdf'): + pdf_output_dir = os.path.join(output_subdir, os.path.splitext(filename)[0]) + os.makedirs(pdf_output_dir, exist_ok=True) + convert_pdf_to_jpg(input_path, pdf_output_dir) + print(f"Converted {filename} to JPG") + else: + print(f"Skipping unsupported file: {filename}") + + def add_common_args(argparser, arguments): + """ + Adds arguments from the local argparser to the main argument parser. + """ local_argparser = get_local_argparser() for argument in arguments: for action in local_argparser._actions: if argument in action.option_strings: - # Copy the argument from local_argparser to argparser argparser.add_argument( *action.option_strings, dest=action.dest, @@ -107,15 +137,44 @@ def add_common_args(argparser, arguments): def run_argparser(argparser): - ( - args, - unknown, - ) = argparser.parse_known_args() - + """ + Runs the argument parser and returns parsed arguments. + """ + args, unknown = argparser.parse_known_args() args = vars(args) - if len(unknown) > 0: + if unknown: argparser.print_help() raise Exception(f"\nError: Unknown arguments: {unknown}") return args + + +def main(): + """ + Main entry point for the script. Handles argument parsing and starts the bulk conversion process. + """ + parser = argparse.ArgumentParser(description="Bulk image and PDF converter") + + # Add standard arguments + add_common_args(parser, ['-i', '--input', '-o', '--output', '--recursive', '--trigger-size']) + + parser.add_argument( + "--format", + choices=['jpg', 'png', 'jpeg'], + default='jpg', + help="Output format for images (default: jpg)" + ) + parser.add_argument( + "--in-place", + action='store_true', + help="Modify files in place" + ) + + args = run_argparser(parser) + + bulk_convert(args['input'], args['output'], args['format'].upper(), args['in_place']) + + +if __name__ == "__main__": + main()