From 65ad6ae01b54e7173b62eed910d1a643fc4ddaa8 Mon Sep 17 00:00:00 2001
From: Anushlinux <anushrutpandit@gmail.com>
Date: Thu, 3 Oct 2024 20:00:48 +0530
Subject: [PATCH 1/2] Update image conversion script to support multiple
 formats

---
 scripts/hooks/convert_images_hook.py | 111 ++++++++++++++++++---------
 1 file changed, 73 insertions(+), 38 deletions(-)

diff --git a/scripts/hooks/convert_images_hook.py b/scripts/hooks/convert_images_hook.py
index 52470361..dcbeb189 100755
--- a/scripts/hooks/convert_images_hook.py
+++ b/scripts/hooks/convert_images_hook.py
@@ -1,41 +1,56 @@
 #!/usr/bin/env python3
-"""Convert png images within the repository."""
-
+"""Convert png and other images within the repository."""
 
 import argparse
 import os
+import sys
+from PIL import Image
 
-from scripts.utils.image_utils import convert_image, get_size_in_kb, get_size_reduction
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from utils.image_utils import get_size_in_kb, get_size_reduction
 
+def convert_image(input_path, output_path, output_format):
+    with Image.open(input_path) as img:
+        if output_format == 'JPG':
+            output_format = 'JPEG'
+        img.save(output_path, output_format)
 
-def convert_images_in_tree(args):
-    filenames = args.get("filenames", None)
-    trigger_size = args.get("trigger_size", None)
+def bulk_convert(input_dir, output_dir, output_format, trigger_size):
+    os.makedirs(output_dir, exist_ok=True)
     converted_count = 0
-    for image_path in filenames:
-        old_size = get_size_in_kb(image_path)
-        if old_size <= trigger_size:
-            continue
-
-        # Note: the pre-commit hook takes care of ensuring only image files are passed here.
-        new_image_path = convert_image(image_path)
-        new_size = get_size_in_kb(new_image_path)
-        if new_size <= old_size:
-            print(
-                f"Converted png to jpg: {image_path}: {new_size:.2f}KB {get_size_reduction(old_size, new_size)}"
-            )
-            converted_count += 1
-        else:
-            print(
-                f"Skipping conversion for {image_path} as size is more than before ({new_size:.2f} KB > {old_size:.2f} KB)"
-            )
-            os.remove(new_image_path)
 
-    return converted_count
+    for root, _, files in os.walk(input_dir):
+        for filename in files:
+            input_path = os.path.join(root, filename)
+            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.tif')):
+                old_size = get_size_in_kb(input_path)
+
+                name, ext = os.path.splitext(filename)
+                output_path = os.path.join(output_dir, f"{name}.{output_format.lower()}")
+
+                convert_image(input_path, output_path, output_format.upper())
+                new_size = get_size_in_kb(output_path)
+
+                if old_size > trigger_size and new_size <= old_size:
+                    print(
+                        f"Converted {filename} to {output_format.upper()}: {new_size:.2f}KB "
+                        f"{get_size_reduction(old_size, new_size)}"
+                    )
+                    converted_count += 1
+                elif old_size <= trigger_size:
+                    print(
+                        f"Converted {filename} to {output_format.upper()}: {new_size:.2f}KB"
+                    )
+                else:
+                    print(
+                        f"Skipping conversion for {filename} as size increased "
+                        f"({new_size:.2f} KB > {old_size:.2f} KB)"
+                    )
+                    # os.remove(output_path)
 
+    return converted_count
 
 def parse_args():
-    # construct the argument parse and parse the arguments
     argparser = argparse.ArgumentParser()
 
     argparser.add_argument(
@@ -46,31 +61,51 @@ def parse_args():
         dest="trigger_size",
         help="Specify minimum file size to trigger the hook.",
     )
+    argparser.add_argument(
+        "--input-dir",
+        default=None,
+        required=False,
+        help="Specify the input directory for bulk conversion.",
+    )
+    argparser.add_argument(
+        "--output-dir",
+        default=None,
+        required=True,
+        help="Specify the output directory for converted files.",
+    )
+    argparser.add_argument(
+        "--format",
+        choices=['jpg', 'jpeg', 'png'],
+        default='jpeg',
+        help="Specify the output format (default: jpeg).",
+    )
     argparser.add_argument("filenames", nargs="*", help="Files to optimize.")
 
-    (
-        args,
-        unknown,
-    ) = argparser.parse_known_args()
-
-    args = vars(args)
+    args, unknown = argparser.parse_known_args()
 
     if len(unknown) > 0:
         argparser.print_help()
         raise Exception(f"\nError: Unknown arguments: {unknown}")
-    return args
-
+    return vars(args)
 
 if __name__ == "__main__":
     args = parse_args()
 
-    converted_count = convert_images_in_tree(args)
     trigger_size = args["trigger_size"]
+    output_dir = args["output_dir"]
+    output_format = args["format"]
+
+    if args.get("input_dir"):
+        converted_count = bulk_convert(args["input_dir"], output_dir, output_format, trigger_size)
+    else:
+        print("No input directory specified. Please provide an input directory.")
+        exit(1)
+
     if converted_count > 0:
         print(
-            f"Note: {converted_count} png images above {trigger_size}KB were converted to jpg.\nPlease manually remove the png files and add your commit again."
+            f"Note: {converted_count} images above {trigger_size}KB were converted to {output_format.upper()}.\n"
         )
         exit(1)
     else:
-        # print("All sample images are jpgs. Commit accepted.")
-        exit(0)
+        print("All images are optimized. Commit accepted.")
+        exit(0)
\ No newline at end of file

From 84ddb531b2275a806ba131294f359ff010d958c9 Mon Sep 17 00:00:00 2001
From: Anushlinux <anushrutpandit@gmail.com>
Date: Fri, 4 Oct 2024 15:01:01 +0530
Subject: [PATCH 2/2] changes on the basis of hooks and bulk conversion

---
 .pre-commit-config.yaml                |   2 +-
 scripts/hooks/convert_images_hook.py   | 113 +++++++------------
 scripts/local/convert_images.py        |  67 ++++--------
 scripts/local/utils/bulk_ops_common.py | 143 +++++++++++++++++--------
 4 files changed, 166 insertions(+), 159 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8babe332..7008751c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
       - id: convert-images
         name: Convert png to jpg in samples
         entry: ./scripts/run_python_hook.sh scripts/hooks/convert_images_hook.py
-        args: ["--trigger-size", "150"]
+        args: ["--format", "jpg"]
         files: ^.*\.(png|PNG)$
         pass_filenames: true
         stages: [commit]
diff --git a/scripts/hooks/convert_images_hook.py b/scripts/hooks/convert_images_hook.py
index dcbeb189..9fdf5b00 100755
--- a/scripts/hooks/convert_images_hook.py
+++ b/scripts/hooks/convert_images_hook.py
@@ -1,56 +1,45 @@
 #!/usr/bin/env python3
-"""Convert png and other images within the repository."""
+"""Convert png images within the repository."""
+
 
 import argparse
 import os
-import sys
-from PIL import Image
 
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from utils.image_utils import get_size_in_kb, get_size_reduction
+import argparse
+import os
+from scripts.local.utils.bulk_ops_common import convert_image, convert_pdf_to_jpg
 
-def convert_image(input_path, output_path, output_format):
-    with Image.open(input_path) as img:
-        if output_format == 'JPG':
-            output_format = 'JPEG'
-        img.save(output_path, output_format)
+from scripts.utils.image_utils import convert_image, get_size_in_kb, get_size_reduction
 
-def bulk_convert(input_dir, output_dir, output_format, trigger_size):
-    os.makedirs(output_dir, exist_ok=True)
-    converted_count = 0
 
-    for root, _, files in os.walk(input_dir):
-        for filename in files:
-            input_path = os.path.join(root, filename)
-            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.tif')):
-                old_size = get_size_in_kb(input_path)
-
-                name, ext = os.path.splitext(filename)
-                output_path = os.path.join(output_dir, f"{name}.{output_format.lower()}")
-
-                convert_image(input_path, output_path, output_format.upper())
-                new_size = get_size_in_kb(output_path)
-
-                if old_size > trigger_size and new_size <= old_size:
-                    print(
-                        f"Converted {filename} to {output_format.upper()}: {new_size:.2f}KB "
-                        f"{get_size_reduction(old_size, new_size)}"
-                    )
-                    converted_count += 1
-                elif old_size <= trigger_size:
-                    print(
-                        f"Converted {filename} to {output_format.upper()}: {new_size:.2f}KB"
-                    )
-                else:
-                    print(
-                        f"Skipping conversion for {filename} as size increased "
-                        f"({new_size:.2f} KB > {old_size:.2f} KB)"
-                    )
-                    # os.remove(output_path)
+def convert_images_in_tree(args):
+    filenames = args.get("filenames", None)
+    trigger_size = args.get("trigger_size", None)
+    converted_count = 0
+    for image_path in filenames:
+        old_size = get_size_in_kb(image_path)
+        if old_size <= trigger_size:
+            continue
+
+        # Note: the pre-commit hook takes care of ensuring only image files are passed here.
+        new_image_path = convert_image(image_path)
+        new_size = get_size_in_kb(new_image_path)
+        if new_size <= old_size:
+            print(
+                f"Converted png to jpg: {image_path}: {new_size:.2f}KB {get_size_reduction(old_size, new_size)}"
+            )
+            converted_count += 1
+        else:
+            print(
+                f"Skipping conversion for {image_path} as size is more than before ({new_size:.2f} KB > {old_size:.2f} KB)"
+            )
+            os.remove(new_image_path)
 
     return converted_count
 
+
 def parse_args():
+    # construct the argument parse and parse the arguments
     argparser = argparse.ArgumentParser()
 
     argparser.add_argument(
@@ -61,51 +50,31 @@ def parse_args():
         dest="trigger_size",
         help="Specify minimum file size to trigger the hook.",
     )
-    argparser.add_argument(
-        "--input-dir",
-        default=None,
-        required=False,
-        help="Specify the input directory for bulk conversion.",
-    )
-    argparser.add_argument(
-        "--output-dir",
-        default=None,
-        required=True,
-        help="Specify the output directory for converted files.",
-    )
-    argparser.add_argument(
-        "--format",
-        choices=['jpg', 'jpeg', 'png'],
-        default='jpeg',
-        help="Specify the output format (default: jpeg).",
-    )
     argparser.add_argument("filenames", nargs="*", help="Files to optimize.")
 
-    args, unknown = argparser.parse_known_args()
+    (
+        args,
+        unknown,
+    ) = argparser.parse_known_args()
+
+    args = vars(args)
 
     if len(unknown) > 0:
         argparser.print_help()
         raise Exception(f"\nError: Unknown arguments: {unknown}")
-    return vars(args)
+    return args
+
 
 if __name__ == "__main__":
     args = parse_args()
 
+    converted_count = convert_images_in_tree(args)
     trigger_size = args["trigger_size"]
-    output_dir = args["output_dir"]
-    output_format = args["format"]
-
-    if args.get("input_dir"):
-        converted_count = bulk_convert(args["input_dir"], output_dir, output_format, trigger_size)
-    else:
-        print("No input directory specified. Please provide an input directory.")
-        exit(1)
-
     if converted_count > 0:
         print(
-            f"Note: {converted_count} images above {trigger_size}KB were converted to {output_format.upper()}.\n"
+            f"Note: {converted_count} png images above {trigger_size}KB were converted to jpg.\nPlease manually remove the png files and add your commit again."
         )
         exit(1)
     else:
-        print("All images are optimized. Commit accepted.")
+        # print("All sample images are jpgs. Commit accepted.")
         exit(0)
\ No newline at end of file
diff --git a/scripts/local/convert_images.py b/scripts/local/convert_images.py
index 8cefd283..3653c18a 100644
--- a/scripts/local/convert_images.py
+++ b/scripts/local/convert_images.py
@@ -1,59 +1,38 @@
+#!/usr/bin/env python3
 import argparse
+import os
+from scripts.local.utils.bulk_ops_common import convert_image
 
-from scripts.local.utils.bulk_ops_common import add_common_args, run_argparser
-
-
-def convert_image_to():
-    # Wrapper to handle all available extensions
-    pass
-
-
-def convert_images_in_tree(args):
-    input_directory = args.get("input", None)
-    recursive = args.get("recursive", None)
-    output_directory = args.get("output", None)
-    trigger_size = args.get("trigger_size", None)
+def convert_images(filenames, output_format):
     converted_count = 0
-    for image_path in filenames:
-        old_size = get_size_in_kb(image_path)
-        if old_size <= trigger_size:
-            continue
-
-        # Note: the pre-commit hook takes care of ensuring only image files are passed here.
-        new_image_path = convert_image(image_path)
-        new_size = get_size_in_kb(new_image_path)
-        if new_size <= old_size:
-            print(
-                f"Converted png to jpg: {image_path}: {new_size:.2f}KB {get_size_reduction(old_size, new_size)}"
-            )
+    for input_path in filenames:
+        if input_path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.tif')):
+            name, ext = os.path.splitext(input_path)
+            output_path = f"{name}.{output_format.lower()}"
+            convert_image(input_path, output_path, output_format)
+            print(f"Converted {input_path} to {output_format}")
             converted_count += 1
         else:
-            print(
-                f"Skipping conversion for {image_path} as size is more than before ({new_size:.2f} KB > {old_size:.2f} KB)"
-            )
-            os.remove(new_image_path)
-
+            print(f"Skipping unsupported file: {input_path}")
     return converted_count
 
-
 def parse_args():
-    # construct the argument parse and parse the arguments
-    argparser = argparse.ArgumentParser()
-    add_common_args(argparser, ["--input", "--output", "--trigger-size", "--recursive"])
-    args = run_argparser(argparser)
+    parser = argparse.ArgumentParser(description="Convert images for pre-commit hook")
+    parser.add_argument(
+        "--format", 
+        choices=['jpg', 'png', 'jpeg'], 
+        default='jpg', 
+        help="Output format for images (default: jpg)"
+    )
+    parser.add_argument("filenames", nargs="*", help="Files to convert.")
+    args = parser.parse_args()
     return args
 
-
 if __name__ == "__main__":
     args = parse_args()
-
-    converted_count = convert_images_in_tree(args)
-    trigger_size = args["trigger_size"]
+    converted_count = convert_images(args.filenames, args.format.upper())
     if converted_count > 0:
-        print(
-            f"Note: {converted_count} png images above {trigger_size}KB were converted to jpg.\nPlease manually remove the png files and add your commit again."
-        )
+        print(f"Note: {converted_count} images were converted.")
         exit(1)
     else:
-        # print("All sample images are jpgs. Commit accepted.")
-        exit(0)
+        exit(0)
\ No newline at end of file
diff --git a/scripts/local/utils/bulk_ops_common.py b/scripts/local/utils/bulk_ops_common.py
index 071a7137..f39cf2f6 100644
--- a/scripts/local/utils/bulk_ops_common.py
+++ b/scripts/local/utils/bulk_ops_common.py
@@ -3,43 +3,20 @@
 import glob
 import operator
 import os
-
+from PIL import Image
+from pdf2image import convert_from_path
 from src.utils.file import PathUtils
 
-# TODO: add shell utilities for simple local images processing such as:
-# From issue: https://github.com/Udayraj123/OMRChecker/issues/213
-# - bulk resize,
-#     - clip to max width (or height)
-#     -  with a conditional trigger if the file size exceeds a provided value
-# - bulk convert :
-#     - pdf to jpg
-#     - png to jpg or vice versa
-#     - tiff
-# - bulk rename files
-#     - adding folder name to file name
-#     - removing non-utf characters from filename (to avoid processing errors)
-# - add watermark to all images
-# - blur a particular section of the images (e.g. student names and signatures)
-# - create a gif from a folder of images
-# - Save output of cropped pages to avoid cropping in each run (and merge with manually cropped images)
-# - Save output of cropped markers to avoid cropping in each run (and merge with manually cropped images)
-
-# Make sure to be cross-os compatible i.e. use Posix paths wherever possible
-
-
-# Maybe have a common util file for bulk ops and then create one file for each of the above util.
-
-
-# Usual pre-processing commands for speedups (useful during re-runs)
-# python3 scripts/local/convert_images.py -i inputs/ --replace [--filter-ext png,jpg] --output-ext jpg
-# python3 scripts/local/resize_images.py -i inputs/ -o outputs --max-width=1500
-
+# TODO: add shell utilities for bulk image processing, resizing, watermarking, etc.
 
 def walk_and_extract_files(input_dir, file_extensions):
+    """
+    Walks through the directory to extract files with specified extensions.
+    """
     extracted_files = []
     for _dir, _subdir, _files in os.walk(input_dir):
         matching_globs = [
-            glob(os.path.join(_dir, f"*.{file_extension}"))
+            glob.glob(os.path.join(_dir, f"*.{file_extension}"))
             for file_extension in file_extensions
         ]
         matching_files = functools.reduce(operator.iconcat, matching_globs, [])
@@ -49,8 +26,11 @@ def walk_and_extract_files(input_dir, file_extensions):
 
 
 def get_local_argparser():
+    """
+    Returns an argument parser with common input, output, and optional recursive processing flags.
+    """
     local_argparser = argparse.ArgumentParser()
-
+    
     local_argparser.add_argument(
         "-i",
         "--input",
@@ -72,8 +52,7 @@ def get_local_argparser():
     local_argparser.add_argument(
         "-r",
         "--recursive",
-        required=True,
-        type=bool,
+        action='store_true',
         dest="recursive",
         help="Specify whether to process subdirectories recursively",
     )
@@ -81,20 +60,71 @@ def get_local_argparser():
     local_argparser.add_argument(
         "--trigger-size",
         default=200,
-        required=True,
         type=int,
         dest="trigger_size",
-        help="Specify minimum file size to trigger the hook.",
+        help="Specify minimum file size (KB) to trigger the hook.",
     )
+
     return local_argparser
 
 
+def convert_image(input_path, output_path, output_format):
+    """
+    Converts an image to the specified output format.
+    """
+    with Image.open(input_path) as img:
+        if output_format == 'JPG':
+            output_format = 'JPEG'
+        img.save(output_path, output_format)
+
+
+def convert_pdf_to_jpg(input_path, output_dir):
+    """
+    Converts a PDF to a series of JPG images, one per page.
+    """
+    pages = convert_from_path(input_path)
+    for i, page in enumerate(pages):
+        output_path = os.path.join(output_dir, f"page_{i + 1}.jpg")
+        page.save(output_path, 'JPEG')
+
+
+def bulk_convert(input_dir, output_dir, output_format, in_place=False):
+    """
+    Bulk converts images and PDFs to the specified format.
+    """
+    os.makedirs(output_dir, exist_ok=True)
+    extensions = ['png', 'jpg', 'jpeg', 'tiff', 'tif', 'pdf']
+
+    filepaths = walk_and_extract_files(input_dir, extensions)
+
+    for input_path in filepaths:
+        relative_path = os.path.relpath(os.path.dirname(input_path), input_dir)
+        output_subdir = os.path.join(output_dir, relative_path) if not in_place else os.path.dirname(input_path)
+        os.makedirs(output_subdir, exist_ok=True)
+
+        filename = os.path.basename(input_path)
+        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.tif')):
+            name, _ = os.path.splitext(filename)
+            output_path = os.path.join(output_subdir, f"{name}.{output_format.lower()}")
+            convert_image(input_path, output_path, output_format)
+            print(f"Converted {filename} to {output_format}")
+        elif filename.lower().endswith('.pdf'):
+            pdf_output_dir = os.path.join(output_subdir, os.path.splitext(filename)[0])
+            os.makedirs(pdf_output_dir, exist_ok=True)
+            convert_pdf_to_jpg(input_path, pdf_output_dir)
+            print(f"Converted {filename} to JPG")
+        else:
+            print(f"Skipping unsupported file: {filename}")
+
+
 def add_common_args(argparser, arguments):
+    """
+    Adds arguments from the local argparser to the main argument parser.
+    """
     local_argparser = get_local_argparser()
     for argument in arguments:
         for action in local_argparser._actions:
             if argument in action.option_strings:
-                # Copy the argument from local_argparser to argparser
                 argparser.add_argument(
                     *action.option_strings,
                     dest=action.dest,
@@ -107,15 +137,44 @@ def add_common_args(argparser, arguments):
 
 
 def run_argparser(argparser):
-    (
-        args,
-        unknown,
-    ) = argparser.parse_known_args()
-
+    """
+    Runs the argument parser and returns parsed arguments.
+    """
+    args, unknown = argparser.parse_known_args()
     args = vars(args)
 
-    if len(unknown) > 0:
+    if unknown:
         argparser.print_help()
         raise Exception(f"\nError: Unknown arguments: {unknown}")
 
     return args
+
+
+def main():
+    """
+    Main entry point for the script. Handles argument parsing and starts the bulk conversion process.
+    """
+    parser = argparse.ArgumentParser(description="Bulk image and PDF converter")
+
+    # Add standard arguments
+    add_common_args(parser, ['-i', '--input', '-o', '--output', '--recursive', '--trigger-size'])
+
+    parser.add_argument(
+        "--format", 
+        choices=['jpg', 'png', 'jpeg'], 
+        default='jpg', 
+        help="Output format for images (default: jpg)"
+    )
+    parser.add_argument(
+        "--in-place", 
+        action='store_true', 
+        help="Modify files in place"
+    )
+
+    args = run_argparser(parser)
+
+    bulk_convert(args['input'], args['output'], args['format'].upper(), args['in_place'])
+
+
+if __name__ == "__main__":
+    main()